^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/err.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/spinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/memremap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/rmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/swapops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/rwsem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/mm_inline.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/page_pinner.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <asm/mmu_context.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) struct follow_page_context {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) struct dev_pagemap *pgmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) unsigned int page_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) static void hpage_pincount_add(struct page *page, int refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) VM_BUG_ON_PAGE(page != compound_head(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) atomic_add(refs, compound_pincount_ptr(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) static void hpage_pincount_sub(struct page *page, int refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) VM_BUG_ON_PAGE(page != compound_head(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) atomic_sub(refs, compound_pincount_ptr(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /* Equivalent to calling put_page() @refs times. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) static void put_page_refs(struct page *page, int refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #ifdef CONFIG_DEBUG_VM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) if (VM_WARN_ON_ONCE_PAGE(page_ref_count(page) < refs, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * Calling put_page() for each ref is unnecessarily slow. Only the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * ref needs a put_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) if (refs > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) page_ref_sub(page, refs - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * Return the compound head page with ref appropriately incremented,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * or NULL if that failed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) static inline struct page *try_get_compound_head(struct page *page, int refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) struct page *head = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) if (WARN_ON_ONCE(page_ref_count(head) < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) if (unlikely(!page_cache_add_speculative(head, refs)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * At this point we have a stable reference to the head page; but it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * could be that between the compound_head() lookup and the refcount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * increment, the compound page was split, in which case we'd end up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * holding a reference on a page that has nothing to do with the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * we were given anymore.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * So now that the head page is stable, recheck that the pages still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * belong together.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) if (unlikely(compound_head(page) != head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) put_page_refs(head, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) return head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * try_grab_compound_head() - attempt to elevate a page's refcount, by a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * flags-dependent amount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * "grab" names in this file mean, "look at flags to decide whether to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * same time. (That's true throughout the get_user_pages*() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * pin_user_pages*() APIs.) Cases:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * FOLL_GET: page's refcount will be incremented by 1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * FOLL_PIN: page's refcount will be incremented by GUP_PIN_COUNTING_BIAS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * Return: head page (with refcount appropriately incremented) for success, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * NULL upon failure. If neither FOLL_GET nor FOLL_PIN was set, that's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * considered failure, and furthermore, a likely bug in the caller, so a warning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * is also emitted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) static __maybe_unused struct page *try_grab_compound_head(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) int refs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) if (flags & FOLL_GET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) struct page *head = try_get_compound_head(page, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) if (head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) set_page_pinner(head, compound_order(head));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) return head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) } else if (flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) int orig_refs = refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * Can't do FOLL_LONGTERM + FOLL_PIN with CMA in the gup fast
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * path, so fail and let the caller fall back to the slow path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) if (unlikely(flags & FOLL_LONGTERM) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) is_migrate_cma_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * CAUTION: Don't use compound_head() on the page before this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * point, the result won't be stable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) page = try_get_compound_head(page, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * When pinning a compound page of order > 1 (which is what
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * hpage_pincount_available() checks for), use an exact count to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * track it, via hpage_pincount_add/_sub().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * However, be sure to *also* increment the normal page refcount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * field at least once, so that the page really is pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (hpage_pincount_available(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) hpage_pincount_add(page, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) orig_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) static void put_compound_head(struct page *page, int refs, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) if (flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) if (hpage_pincount_available(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) hpage_pincount_sub(page, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) refs *= GUP_PIN_COUNTING_BIAS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) if (flags & FOLL_GET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) reset_page_pinner(page, compound_order(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) put_page_refs(page, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * try_grab_page() - elevate a page's refcount by a flag-dependent amount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * This might not do anything at all, depending on the flags argument.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * "grab" names in this file mean, "look at flags to decide whether to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * @page: pointer to page to be grabbed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) * @flags: gup flags: these are the FOLL_* flag values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * time. Cases:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * FOLL_GET: page's refcount will be incremented by 1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * FOLL_PIN: page's refcount will be incremented by GUP_PIN_COUNTING_BIAS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * Return: true for success, or if no action was required (if neither FOLL_PIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) * nor FOLL_GET was set, nothing is done). False for failure: FOLL_GET or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * FOLL_PIN was set, but the page could not be grabbed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) bool __must_check try_grab_page(struct page *page, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) if (flags & FOLL_GET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) bool ret = try_get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) page = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) set_page_pinner(page, compound_order(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) } else if (flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) int refs = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) page = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (WARN_ON_ONCE(page_ref_count(page) <= 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) if (hpage_pincount_available(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) hpage_pincount_add(page, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) refs = GUP_PIN_COUNTING_BIAS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * Similar to try_grab_compound_head(): even if using the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * hpage_pincount_add/_sub() routines, be sure to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * *also* increment the normal page refcount field at least
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * once, so that the page really is pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) page_ref_add(page, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * unpin_user_page() - release a dma-pinned page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * @page: pointer to page to be released
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * Pages that were pinned via pin_user_pages*() must be released via either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * unpin_user_page(), or one of the unpin_user_pages*() routines. This is so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * that such pages can be separately tracked and uniquely handled. In
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * particular, interactions with RDMA and filesystems need special handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) void unpin_user_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) put_compound_head(compound_head(page), 1, FOLL_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) EXPORT_SYMBOL(unpin_user_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) * put_user_page() - release a page obtained using get_user_pages() or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * follow_page(FOLL_GET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * @page: pointer to page to be released
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * Pages that were obtained via get_user_pages()/follow_page(FOLL_GET) must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * released via put_user_page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * note: If it's not a page from GUP or follow_page(FOLL_GET), it's harmless.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) void put_user_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) struct page *head = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) reset_page_pinner(head, compound_order(head));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) EXPORT_SYMBOL(put_user_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) * unpin_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * @pages: array of pages to be maybe marked dirty, and definitely released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * @npages: number of pages in the @pages array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * @make_dirty: whether to mark the pages dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * "gup-pinned page" refers to a page that has had one of the get_user_pages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * variants called on that page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * For each page in the @pages array, make that page (or its head page, if a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * compound page) dirty, if @make_dirty is true, and if the page was previously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * listed as clean. In any case, releases all pages using unpin_user_page(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) * possibly via unpin_user_pages(), for the non-dirty case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) * Please see the unpin_user_page() documentation for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * required, then the caller should a) verify that this is really correct,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) * because _lock() is usually required, and b) hand code it:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) * set_page_dirty_lock(), unpin_user_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) bool make_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * TODO: this can be optimized for huge pages: if a series of pages is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * physically contiguous and part of the same compound page, then a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) * single operation to the head page should suffice.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) if (!make_dirty) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) unpin_user_pages(pages, npages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) for (index = 0; index < npages; index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) struct page *page = compound_head(pages[index]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * Checking PageDirty at this point may race with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * clear_page_dirty_for_io(), but that's OK. Two key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * cases:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * 1) This code sees the page as already dirty, so it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * skips the call to set_page_dirty(). That could happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) * because clear_page_dirty_for_io() called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * page_mkclean(), followed by set_page_dirty().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * However, now the page is going to get written back,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * which meets the original intention of setting it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) * dirty, so all is well: clear_page_dirty_for_io() goes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) * on to call TestClearPageDirty(), and write the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) * back.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) * 2) This code sees the page as clean, so it calls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) * set_page_dirty(). The page stays dirty, despite being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) * written back, so it gets written back again in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) * next writeback cycle. This is harmless.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) if (!PageDirty(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) set_page_dirty_lock(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) unpin_user_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * unpin_user_pages() - release an array of gup-pinned pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * @pages: array of pages to be marked dirty and released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) * @npages: number of pages in the @pages array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) * For each page in the @pages array, release the page using unpin_user_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) * Please see the unpin_user_page() documentation for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) void unpin_user_pages(struct page **pages, unsigned long npages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) * If this WARN_ON() fires, then the system *might* be leaking pages (by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * leaving them pinned), but probably not. More likely, gup/pup returned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) * a hard -ERRNO error to the caller, who erroneously passed it here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) if (WARN_ON(IS_ERR_VALUE(npages)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * TODO: this can be optimized for huge pages: if a series of pages is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) * physically contiguous and part of the same compound page, then a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) * single operation to the head page should suffice.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) for (index = 0; index < npages; index++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) unpin_user_page(pages[index]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) EXPORT_SYMBOL(unpin_user_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) #ifdef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) static struct page *no_page_table(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) * When core dumping an enormous anonymous area that nobody
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) * has touched so far, we don't want to allocate unnecessary pages or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) * page tables. Return error instead of NULL to skip handle_mm_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) * then get_dump_page() will return NULL to leave a hole in the dump.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) * But we can only make this optimization where a hole would surely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) * be zero-filled if handle_mm_fault() actually did handle it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) if ((flags & FOLL_DUMP) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) (vma_is_anonymous(vma) || !vma->vm_ops->fault))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) return ERR_PTR(-EFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) pte_t *pte, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) /* No page to get reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) if (flags & FOLL_GET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) if (flags & FOLL_TOUCH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) pte_t entry = *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) if (flags & FOLL_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) entry = pte_mkdirty(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) entry = pte_mkyoung(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) if (!pte_same(*pte, entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) set_pte_at(vma->vm_mm, address, pte, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) update_mmu_cache(vma, address, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) /* Proper page table entry exists, but no corresponding struct page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) * FOLL_FORCE can write to even unwritable pte's, but only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * after we've gone through a COW cycle and they are dirty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) return pte_write(pte) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) static struct page *follow_page_pte(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) unsigned long address, pmd_t *pmd, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) struct dev_pagemap **pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) pte_t *ptep, pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) /* FOLL_GET and FOLL_PIN are mutually exclusive. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) (FOLL_PIN | FOLL_GET)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) if (unlikely(pmd_bad(*pmd)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) pte = *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) if (!pte_present(pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) * KSM's break_ksm() relies upon recognizing a ksm page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) * even while it is being migrated, so for that case we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) * need migration_entry_wait().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) if (likely(!(flags & FOLL_MIGRATION)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) goto no_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) if (pte_none(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) goto no_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) entry = pte_to_swp_entry(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) if (!is_migration_entry(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) goto no_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) migration_entry_wait(mm, pmd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) if ((flags & FOLL_NUMA) && pte_protnone(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) goto no_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) page = vm_normal_page(vma, address, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * Only return device mapping pages in the FOLL_GET or FOLL_PIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) * case since they are only valid while holding the pgmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) if (*pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) page = pte_page(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) goto no_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) } else if (unlikely(!page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) if (flags & FOLL_DUMP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) /* Avoid special (like zero) pages in core dumps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) page = ERR_PTR(-EFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) if (is_zero_pfn(pte_pfn(pte))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) page = pte_page(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) ret = follow_pfn_pte(vma, address, ptep, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) page = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) if (flags & FOLL_SPLIT && PageTransCompound(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) ret = split_huge_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) if (unlikely(!try_grab_page(page, flags))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) page = ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) * We need to make the page accessible if and only if we are going
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * to access its content (the FOLL_PIN case). Please see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * Documentation/core-api/pin_user_pages.rst for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) if (flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) ret = arch_make_page_accessible(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) unpin_user_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) page = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) if (flags & FOLL_TOUCH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) if ((flags & FOLL_WRITE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) !pte_dirty(pte) && !PageDirty(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * pte_mkyoung() would be more correct here, but atomic care
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) * is needed to avoid losing the dirty bit: it is easier to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) * mark_page_accessed().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) mark_page_accessed(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) /* Do not mlock pte-mapped THP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) if (PageTransCompound(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) * The preliminary mapping check is mainly to avoid the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) * pointless overhead of lock_page on the ZERO_PAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * which might bounce very badly if there is contention.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) * If the page is already locked, we don't need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) * handle it now - vmscan will handle it later if and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) * when it attempts to reclaim the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) if (page->mapping && trylock_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) lru_add_drain(); /* push cached pages to LRU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) * Because we lock page here, and migration is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) * blocked by the pte's page reference, and we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) * know the page is still mapped, we don't even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) * need to check for file-cache page truncation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) mlock_vma_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) no_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) if (!pte_none(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) static struct page *follow_pmd_mask(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) unsigned long address, pud_t *pudp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) struct follow_page_context *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) pmd_t *pmd, pmdval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) pmd = pmd_offset(pudp, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) * The READ_ONCE() will stabilize the pmdval in a register or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) * on the stack so that it will stop changing under the code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) pmdval = READ_ONCE(*pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) if (pmd_none(pmdval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) page = follow_huge_pmd(mm, address, pmd, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) page = follow_huge_pd(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) __hugepd(pmd_val(pmdval)), flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) PMD_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) if (!pmd_present(pmdval)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) if (likely(!(flags & FOLL_MIGRATION)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) VM_BUG_ON(thp_migration_supported() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) !is_pmd_migration_entry(pmdval));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) if (is_pmd_migration_entry(pmdval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) pmd_migration_entry_wait(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) pmdval = READ_ONCE(*pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) * MADV_DONTNEED may convert the pmd to null because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) * mmap_lock is held in read mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) if (pmd_none(pmdval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) if (pmd_devmap(pmdval)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) ptl = pmd_lock(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) if (likely(!pmd_trans_huge(pmdval)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) retry_locked:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) ptl = pmd_lock(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) if (unlikely(pmd_none(*pmd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) if (unlikely(!pmd_present(*pmd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) if (likely(!(flags & FOLL_MIGRATION)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) pmd_migration_entry_wait(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) goto retry_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) if (unlikely(!pmd_trans_huge(*pmd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) page = pmd_page(*pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) if (is_huge_zero_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) split_huge_pmd(vma, pmd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) if (pmd_trans_unstable(pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) } else if (flags & FOLL_SPLIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) if (unlikely(!try_get_page(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) ret = split_huge_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) if (pmd_none(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) } else { /* flags & FOLL_SPLIT_PMD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) split_huge_pmd(vma, pmd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) return ret ? ERR_PTR(ret) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) page = follow_trans_huge_pmd(vma, address, pmd, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) ctx->page_mask = HPAGE_PMD_NR - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) static struct page *follow_pud_mask(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) unsigned long address, p4d_t *p4dp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) struct follow_page_context *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) pud = pud_offset(p4dp, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) if (pud_none(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) page = follow_huge_pud(mm, address, pud, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) if (is_hugepd(__hugepd(pud_val(*pud)))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) page = follow_huge_pd(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) __hugepd(pud_val(*pud)), flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) PUD_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) if (pud_devmap(*pud)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) ptl = pud_lock(mm, pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) if (unlikely(pud_bad(*pud)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) return follow_pmd_mask(vma, address, pud, flags, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) static struct page *follow_p4d_mask(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) unsigned long address, pgd_t *pgdp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) struct follow_page_context *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) p4d = p4d_offset(pgdp, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) if (p4d_none(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) BUILD_BUG_ON(p4d_huge(*p4d));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) if (unlikely(p4d_bad(*p4d)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) page = follow_huge_pd(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) __hugepd(p4d_val(*p4d)), flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) P4D_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) return follow_pud_mask(vma, address, p4d, flags, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) * follow_page_mask - look up a page descriptor from a user-virtual address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) * @vma: vm_area_struct mapping @address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) * @address: virtual address to look up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) * @flags: flags modifying lookup behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) * @ctx: contains dev_pagemap for %ZONE_DEVICE memory pinning and a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) * pointer to output page_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) * @flags can have FOLL_ flags set, defined in <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) * When getting pages from ZONE_DEVICE memory, the @ctx->pgmap caches
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) * the device's dev_pagemap metadata to avoid repeating expensive lookups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) * On output, the @ctx->page_mask is set according to the size of the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) * Return: the mapped (struct page *), %NULL if no mapping exists, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) * an error pointer if there is a mapping to something not represented
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) * by a page descriptor (see also vm_normal_page()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) static struct page *follow_page_mask(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) unsigned long address, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) struct follow_page_context *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) ctx->page_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) /* make this handle hugepd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) if (!IS_ERR(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) WARN_ON_ONCE(flags & (FOLL_GET | FOLL_PIN));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) pgd = pgd_offset(mm, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) if (pgd_huge(*pgd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) page = follow_huge_pgd(mm, address, pgd, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) page = follow_huge_pd(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) __hugepd(pgd_val(*pgd)), flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) PGDIR_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) return follow_p4d_mask(vma, address, pgd, flags, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) unsigned int foll_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) struct follow_page_context ctx = { NULL };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) page = follow_page_mask(vma, address, foll_flags, &ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) if (ctx.pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) put_dev_pagemap(ctx.pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) static int get_gate_page(struct mm_struct *mm, unsigned long address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) unsigned int gup_flags, struct vm_area_struct **vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) struct page **page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) int ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) /* user gate pages are read-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) if (gup_flags & FOLL_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) if (address > TASK_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) pgd = pgd_offset_k(address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) pgd = pgd_offset_gate(mm, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) if (pgd_none(*pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) p4d = p4d_offset(pgd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) if (p4d_none(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) pud = pud_offset(p4d, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) if (pud_none(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) pmd = pmd_offset(pud, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) if (!pmd_present(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) VM_BUG_ON(pmd_trans_huge(*pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) pte = pte_offset_map(pmd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) if (pte_none(*pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) goto unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) *vma = get_gate_vma(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) *page = vm_normal_page(*vma, address, *pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) if (!*page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) goto unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) *page = pte_page(*pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) if (unlikely(!try_grab_page(*page, gup_flags))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) goto unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) unmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) pte_unmap(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) * mmap_lock must be held on entry. If @locked != NULL and *@flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) * does not include FOLL_NOWAIT, the mmap_lock may be released. If it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) * is, *@locked will be set to 0 and -EBUSY returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) static int faultin_page(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) unsigned long address, unsigned int *flags, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) unsigned int fault_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) vm_fault_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) /* mlock all present pages, but do not fault in new pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) if (*flags & FOLL_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) fault_flags |= FAULT_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) if (*flags & FOLL_REMOTE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) fault_flags |= FAULT_FLAG_REMOTE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) if (*flags & FOLL_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) if (*flags & FOLL_TRIED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) * Note: FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_TRIED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) * can co-exist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) fault_flags |= FAULT_FLAG_TRIED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) ret = handle_mm_fault(vma, address, fault_flags, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) if (ret & VM_FAULT_ERROR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) int err = vm_fault_to_errno(ret, *flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) if (ret & VM_FAULT_RETRY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) if (locked && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) *locked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) * necessary, even if maybe_mkwrite decided not to set pte_write. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) * can thus safely do subsequent page lookups as if they were reads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) * But only do so when looping for pte_write is futile: in some cases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) * userspace may also be wanting to write to the gotten user page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) * which a read fault here might prevent (a readonly page might get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) * reCOWed by userspace write).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) *flags |= FOLL_COW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) vm_flags_t vm_flags = vma->vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) int write = (gup_flags & FOLL_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) int foreign = (gup_flags & FOLL_REMOTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) if (vm_flags & (VM_IO | VM_PFNMAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) if (write) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) if (!(vm_flags & VM_WRITE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) if (!(gup_flags & FOLL_FORCE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) * We used to let the write,force case do COW in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) * set a breakpoint in a read-only mapping of an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) * executable, without corrupting the file (yet only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) * when that file had been opened for writing!).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) * Anon pages in shared mappings are surprising: now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) * just reject it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) if (!is_cow_mapping(vm_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) } else if (!(vm_flags & VM_READ)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) if (!(gup_flags & FOLL_FORCE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) * Is there actually any vma we can reach here which does not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) * have VM_MAYREAD set?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) if (!(vm_flags & VM_MAYREAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) * gups are always data accesses, not instruction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) * fetches, so execute=false here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) if (!arch_vma_access_permitted(vma, write, false, foreign))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) * __get_user_pages() - pin user pages in memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) * @mm: mm_struct of target mm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) * @start: starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) * @nr_pages: number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) * @gup_flags: flags modifying pin behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) * @pages: array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) * Should be at least nr_pages long. Or NULL, if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) * only intends to ensure the pages are faulted in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) * @vmas: array of pointers to vmas corresponding to each page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) * Or NULL if the caller does not require them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) * @locked: whether we're still with the mmap_lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) * Returns either number of pages pinned (which may be less than the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) * number requested), or an error. Details about the return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) * -- If nr_pages is 0, returns 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) * -- If nr_pages is >0, but no pages were pinned, returns -errno.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) * -- If nr_pages is >0, and some pages were pinned, returns the number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) * pages pinned. Again, this may be less than nr_pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) * -- 0 return value is possible when the fault would need to be retried.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) * The caller is responsible for releasing returned @pages, via put_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) * @vmas are valid only as long as mmap_lock is held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) * Must be called with mmap_lock held. It may be released. See below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) * __get_user_pages walks a process's page tables and takes a reference to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) * each struct page that each user address corresponds to at a given
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) * instant. That is, it takes the page that would be accessed if a user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) * thread accesses the given user virtual address at that instant.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) * This does not guarantee that the page exists in the user mappings when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) * __get_user_pages returns, and there may even be a completely different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) * page there in some cases (eg. if mmapped pagecache has been invalidated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * and subsequently re faulted). However it does guarantee that the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) * won't be freed completely. And mostly callers simply care that the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) * contains data that was valid *at some point in time*. Typically, an IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) * or similar operation cannot guarantee anything stronger anyway because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) * locks can't be held over the syscall boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) * If @gup_flags & FOLL_WRITE == 0, the page must not be written to. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) * the page is written to, set_page_dirty (or set_page_dirty_lock, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) * appropriate) must be called after the page is finished with, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) * before put_page is called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) * If @locked != NULL, *@locked will be set to 0 when mmap_lock is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) * released by an up_read(). That can happen if @gup_flags does not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) * have FOLL_NOWAIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) * A caller using such a combination of @locked and @gup_flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) * must therefore hold the mmap_lock for reading only, and recognize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) * when it's been released. Otherwise, it must be held for either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) * reading or writing and will not be released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) * In most cases, get_user_pages or get_user_pages_fast should be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) * instead of __get_user_pages. __get_user_pages should be used only if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) * you need some special @gup_flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) static long __get_user_pages(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) struct vm_area_struct **vmas, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) long ret = 0, i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) struct vm_area_struct *vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) struct follow_page_context ctx = { NULL };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) if (!nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) start = untagged_addr(start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) VM_BUG_ON(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) * If FOLL_FORCE is set then do not force a full fault as the hinting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) * fault information is unrelated to the reference behaviour of a task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) * using the address space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) if (!(gup_flags & FOLL_FORCE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) gup_flags |= FOLL_NUMA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) unsigned int foll_flags = gup_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) unsigned int page_increm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) /* first iteration or cross vma bound */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) if (!vma || start >= vma->vm_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) vma = find_extend_vma(mm, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) if (!vma && in_gate_area(mm, start)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) ret = get_gate_page(mm, start & PAGE_MASK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) gup_flags, &vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) pages ? &pages[i] : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) ctx.page_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) goto next_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) if (!vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) ret = check_vma_flags(vma, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) if (is_vm_hugetlb_page(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) i = follow_hugetlb_page(mm, vma, pages, vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) &start, &nr_pages, i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) gup_flags, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) if (locked && *locked == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) * We've got a VM_FAULT_RETRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) * and we've lost mmap_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) * We must stop here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) BUG_ON(gup_flags & FOLL_NOWAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) BUG_ON(ret != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) * If we have a pending SIGKILL, don't keep faulting pages and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) * potentially allocating memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) if (fatal_signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) page = follow_page_mask(vma, start, foll_flags, &ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) ret = faultin_page(vma, start, &foll_flags, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) switch (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) case 0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) case -EBUSY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) case -EFAULT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) case -ENOMEM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) case -EHWPOISON:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) case -ENOENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) goto next_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) } else if (PTR_ERR(page) == -EEXIST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) * Proper page table entry exists, but no corresponding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) * struct page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) goto next_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) } else if (IS_ERR(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) ret = PTR_ERR(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) if (pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) pages[i] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) flush_anon_page(vma, page, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) ctx.page_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) next_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) if (vmas) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) vmas[i] = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) ctx.page_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) if (page_increm > nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) page_increm = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) i += page_increm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) start += page_increm * PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) nr_pages -= page_increm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) } while (nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) if (ctx.pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) put_dev_pagemap(ctx.pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) return i ? i : ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) static bool vma_permits_fault(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) unsigned int fault_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) bool write = !!(fault_flags & FAULT_FLAG_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) if (!(vm_flags & vma->vm_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) * The architecture might have a hardware protection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) * mechanism other than read/write that can deny access.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) * gup always represents data access, not instruction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) * fetches, so execute=false here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) if (!arch_vma_access_permitted(vma, write, false, foreign))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) * fixup_user_fault() - manually resolve a user page fault
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) * @mm: mm_struct of target mm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) * @address: user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) * @fault_flags:flags to pass down to handle_mm_fault()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) * @unlocked: did we unlock the mmap_lock while retrying, maybe NULL if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) * does not allow retry. If NULL, the caller must guarantee
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) * that fault_flags does not contain FAULT_FLAG_ALLOW_RETRY.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) * This is meant to be called in the specific scenario where for locking reasons
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) * we try to access user memory in atomic context (within a pagefault_disable()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) * section), this returns -EFAULT, and we want to resolve the user fault before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) * trying again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) * Typically this is meant to be used by the futex code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) * The main difference with get_user_pages() is that this function will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) * unconditionally call handle_mm_fault() which will in turn perform all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) * necessary SW fixup of the dirty and young bits in the PTE, while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) * get_user_pages() only guarantees to update these in the struct page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) * This is important for some architectures where those bits also gate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) * access permission to the page because they are maintained in software. On
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) * such architectures, gup() will not be enough to make a subsequent access
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) * succeed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) * This function will not return with an unlocked mmap_lock. So it has not the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) * same semantics wrt the @mm->mmap_lock as does filemap_fault().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) int fixup_user_fault(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) unsigned long address, unsigned int fault_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) bool *unlocked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) vm_fault_t ret, major = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) address = untagged_addr(address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) if (unlocked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) vma = find_extend_vma(mm, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) if (!vma || address < vma->vm_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) if (!vma_permits_fault(vma, fault_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) if ((fault_flags & FAULT_FLAG_KILLABLE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) fatal_signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) ret = handle_mm_fault(vma, address, fault_flags, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) major |= ret & VM_FAULT_MAJOR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) if (ret & VM_FAULT_ERROR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) int err = vm_fault_to_errno(ret, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) if (ret & VM_FAULT_RETRY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) *unlocked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) fault_flags |= FAULT_FLAG_TRIED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) EXPORT_SYMBOL_GPL(fixup_user_fault);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) * Please note that this function, unlike __get_user_pages will not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) * return 0 for nr_pages > 0 without FOLL_NOWAIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) struct vm_area_struct **vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) int *locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) long ret, pages_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) bool lock_dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) if (locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) /* if VM_FAULT_RETRY can be returned, vmas become invalid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) BUG_ON(vmas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) /* check caller initialized locked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) BUG_ON(*locked != 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) if (flags & FOLL_PIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) atomic_set(&mm->has_pinned, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) * FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) * is to set FOLL_GET if the caller wants pages[] filled in (but has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) * carelessly failed to specify FOLL_GET), so keep doing that, but only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) * for FOLL_GET, not for the newer FOLL_PIN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) * FOLL_PIN always expects pages to be non-null, but no need to assert
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) * that here, as any failures will be obvious enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) if (pages && !(flags & FOLL_PIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) flags |= FOLL_GET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) pages_done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) lock_dropped = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) ret = __get_user_pages(mm, start, nr_pages, flags, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) vmas, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) if (!locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) /* VM_FAULT_RETRY couldn't trigger, bypass */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) /* VM_FAULT_RETRY cannot return errors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) if (!*locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) BUG_ON(ret < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) BUG_ON(ret >= nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) nr_pages -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) pages_done += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) if (!nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) if (*locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) * VM_FAULT_RETRY didn't trigger or it was a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) * FOLL_NOWAIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) if (!pages_done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) pages_done = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) * VM_FAULT_RETRY triggered, so seek to the faulting offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) * For the prefault case (!pages) we only update counts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) if (likely(pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) pages += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) start += ret << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) lock_dropped = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) * Repeat on the address that fired VM_FAULT_RETRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) * with both FAULT_FLAG_ALLOW_RETRY and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) * FAULT_FLAG_TRIED. Note that GUP can be interrupted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) * by fatal signals, so we need to check it before we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) * start trying again otherwise it can loop forever.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) if (fatal_signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) if (!pages_done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) pages_done = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) ret = mmap_read_lock_killable(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) BUG_ON(ret > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) if (!pages_done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) pages_done = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) *locked = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) pages, NULL, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) if (!*locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) /* Continue to retry until we succeeded */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) BUG_ON(ret != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) if (ret != 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) BUG_ON(ret > 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) if (!pages_done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) pages_done = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) nr_pages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) pages_done++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) if (!nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) if (likely(pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) start += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) if (lock_dropped && *locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) * We must let the caller know we temporarily dropped the lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) * and so the critical section protected by it was lost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) *locked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) return pages_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) * populate_vma_page_range() - populate a range of pages in the vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) * @vma: target vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) * @start: start address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) * @end: end address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) * @locked: whether the mmap_lock is still held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) * This takes care of mlocking the pages too if VM_LOCKED is set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) * Return either number of pages pinned in the vma, or a negative error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) * code on error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) * vma->vm_mm->mmap_lock must be held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) * If @locked is NULL, it may be held for read or write and will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) * be unperturbed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) * If @locked is non-NULL, it must held for read only and may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) * released. If it's released, *@locked will be set to 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) long populate_vma_page_range(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) unsigned long start, unsigned long end, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) unsigned long nr_pages = (end - start) / PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) int gup_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) VM_BUG_ON(start & ~PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) VM_BUG_ON(end & ~PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) VM_BUG_ON_VMA(start < vma->vm_start, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) VM_BUG_ON_VMA(end > vma->vm_end, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) mmap_assert_locked(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) if (vma->vm_flags & VM_LOCKONFAULT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) gup_flags &= ~FOLL_POPULATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) * We want to touch writable mappings with a write fault in order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) * to break COW, except for shared mappings because these don't COW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) * and we would not want to dirty them for nothing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) gup_flags |= FOLL_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) * We want mlock to succeed for regions that have any permissions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) * other than PROT_NONE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) if (vma_is_accessible(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) gup_flags |= FOLL_FORCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) * We made sure addr is within a VMA, so the following will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) * not result in a stack expansion that recurses back here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) return __get_user_pages(mm, start, nr_pages, gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) NULL, NULL, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) * __mm_populate - populate and/or mlock pages within a range of address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) * flags. VMAs must be already marked with the desired vm_flags, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) * mmap_lock must not be held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) unsigned long end, nstart, nend;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) struct vm_area_struct *vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) int locked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) end = start + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) for (nstart = start; nstart < end; nstart = nend) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) * We want to fault in pages for [nstart; end) address range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) * Find first corresponding VMA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) if (!locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) locked = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) vma = find_vma(mm, nstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) } else if (nstart >= vma->vm_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) vma = vma->vm_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) if (!vma || vma->vm_start >= end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) * Set [nstart; nend) to intersection of desired address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) * range with the first VMA. Also, skip undesirable VMA types.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) nend = min(end, vma->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) if (vma->vm_flags & (VM_IO | VM_PFNMAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) if (nstart < vma->vm_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) nstart = vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) * Now fault in a range of pages. populate_vma_page_range()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) * double checks the vma flags, so that it won't mlock pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) * if the vma was already munlocked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) ret = populate_vma_page_range(vma, nstart, nend, &locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) if (ignore_errors) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) continue; /* continue at next VMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) nend = nstart + ret * PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) return ret; /* 0 or negative error code */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) #else /* CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) unsigned long nr_pages, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) struct vm_area_struct **vmas, int *locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) unsigned int foll_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) unsigned long vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) /* calculate required read or write permissions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) * If FOLL_FORCE is set, we only require the "MAY" flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) vm_flags = (foll_flags & FOLL_WRITE) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) vm_flags &= (foll_flags & FOLL_FORCE) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) vma = find_vma(mm, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) if (!vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) goto finish_or_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) /* protect what we can, including chardevs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) !(vm_flags & vma->vm_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) goto finish_or_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) if (pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) pages[i] = virt_to_page(start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) if (pages[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) get_page(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) if (vmas)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) vmas[i] = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) start = (start + PAGE_SIZE) & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) finish_or_fault:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) return i ? : -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) #endif /* !CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) * get_dump_page() - pin user page in memory while writing it to core dump
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) * @addr: user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) * Returns struct page pointer of user page pinned for dump,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) * to be freed afterwards by put_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) * Returns NULL on any kind of failure - a hole must then be inserted into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) * the corefile, to preserve alignment with its headers; and also returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) * allowing a hole to be left in the corefile to save diskspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) * Called without mmap_lock (takes and releases the mmap_lock by itself).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) #ifdef CONFIG_ELF_CORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) struct page *get_dump_page(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) int locked = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) if (mmap_read_lock_killable(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) ret = __get_user_pages_locked(mm, addr, 1, &page, NULL, &locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) FOLL_FORCE | FOLL_DUMP | FOLL_GET);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) return (ret == 1) ? page : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) #endif /* CONFIG_ELF_CORE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) static long check_and_migrate_cma_pages(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) struct vm_area_struct **vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) unsigned int gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) unsigned long i, isolation_error_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) bool drain_allow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) LIST_HEAD(cma_page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) long ret = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) struct page *prev_head, *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) struct migration_target_control mtc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) .nid = NUMA_NO_NODE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) check_again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) prev_head = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) isolation_error_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) drain_allow = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) head = compound_head(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) if (head == prev_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) prev_head = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) * If we get a page from the CMA zone, since we are going to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) * be pinning these entries, we might as well move them out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) * of the CMA zone if possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) if (is_migrate_cma_page(head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) if (PageHuge(head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) if (!isolate_huge_page(head, &cma_page_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) isolation_error_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) if (!PageLRU(head) && drain_allow) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) lru_add_drain_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) drain_allow = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) if (isolate_lru_page(head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) isolation_error_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) list_add_tail(&head->lru, &cma_page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) mod_node_page_state(page_pgdat(head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) NR_ISOLATED_ANON +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) page_is_file_lru(head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) thp_nr_pages(head));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) * If list is empty, and no isolation errors, means that all pages are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) * in the correct zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) if (list_empty(&cma_page_list) && !isolation_error_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) if (!list_empty(&cma_page_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) * drop the above get_user_pages reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) if (gup_flags & FOLL_PIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) unpin_user_pages(pages, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) for (i = 0; i < nr_pages; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) put_page(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) ret = migrate_pages(&cma_page_list, alloc_migration_target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) NULL, (unsigned long)&mtc, MIGRATE_SYNC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) MR_CONTIG_RANGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) if (!list_empty(&cma_page_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) putback_movable_pages(&cma_page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) return ret > 0 ? -ENOMEM : ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) /* We unpinned pages before migration, pin them again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) ret = __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) NULL, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) if (ret <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) nr_pages = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) * check again because pages were unpinned, and we also might have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) * had isolation errors and need more pages to migrate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) goto check_again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) static long check_and_migrate_cma_pages(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) struct vm_area_struct **vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) unsigned int gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) #endif /* CONFIG_CMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) * __gup_longterm_locked() is a wrapper for __get_user_pages_locked which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) * allows us to process the FOLL_LONGTERM flag.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) static long __gup_longterm_locked(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) struct vm_area_struct **vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) unsigned int gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) unsigned long flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) long rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) if (gup_flags & FOLL_LONGTERM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) flags = memalloc_nocma_save();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) if (gup_flags & FOLL_LONGTERM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) if (rc > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) rc = check_and_migrate_cma_pages(mm, start, rc, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) vmas, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) memalloc_nocma_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) static bool is_valid_gup_flags(unsigned int gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) * never directly by the caller, so enforce that with an assertion:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) * FOLL_PIN is a prerequisite to FOLL_LONGTERM. Another way of saying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) * that is, FOLL_LONGTERM is a specific case, more restrictive case of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) * FOLL_PIN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) #ifdef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) static long __get_user_pages_remote(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) struct vm_area_struct **vmas, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) * Parts of FOLL_LONGTERM behavior are incompatible with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) * vmas. However, this only comes up if locked is set, and there are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) * callers that do request FOLL_LONGTERM, but do not set locked. So,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) * allow what we can.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) if (gup_flags & FOLL_LONGTERM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) if (WARN_ON_ONCE(locked))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) * This will check the vmas (even if our vmas arg is NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) * and return -ENOTSUPP if DAX isn't allowed in this case:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) return __gup_longterm_locked(mm, start, nr_pages, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) vmas, gup_flags | FOLL_TOUCH |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) FOLL_REMOTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) gup_flags | FOLL_TOUCH | FOLL_REMOTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) * get_user_pages_remote() - pin user pages in memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) * @mm: mm_struct of target mm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) * @start: starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) * @nr_pages: number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) * @gup_flags: flags modifying lookup behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) * @pages: array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) * Should be at least nr_pages long. Or NULL, if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) * only intends to ensure the pages are faulted in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) * @vmas: array of pointers to vmas corresponding to each page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) * Or NULL if the caller does not require them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) * @locked: pointer to lock flag indicating whether lock is held and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) * subsequently whether VM_FAULT_RETRY functionality can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) * utilised. Lock must initially be held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) * Returns either number of pages pinned (which may be less than the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) * number requested), or an error. Details about the return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) * -- If nr_pages is 0, returns 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) * -- If nr_pages is >0, but no pages were pinned, returns -errno.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) * -- If nr_pages is >0, and some pages were pinned, returns the number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) * pages pinned. Again, this may be less than nr_pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) * The caller is responsible for releasing returned @pages, via put_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) * @vmas are valid only as long as mmap_lock is held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) * Must be called with mmap_lock held for read or write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) * get_user_pages_remote walks a process's page tables and takes a reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) * to each struct page that each user address corresponds to at a given
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) * instant. That is, it takes the page that would be accessed if a user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) * thread accesses the given user virtual address at that instant.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) * This does not guarantee that the page exists in the user mappings when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) * get_user_pages_remote returns, and there may even be a completely different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) * page there in some cases (eg. if mmapped pagecache has been invalidated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) * and subsequently re faulted). However it does guarantee that the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) * won't be freed completely. And mostly callers simply care that the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) * contains data that was valid *at some point in time*. Typically, an IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) * or similar operation cannot guarantee anything stronger anyway because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) * locks can't be held over the syscall boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) * If gup_flags & FOLL_WRITE == 0, the page must not be written to. If the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) * is written to, set_page_dirty (or set_page_dirty_lock, as appropriate) must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) * be called after the page is finished with, and before put_page is called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) * get_user_pages_remote is typically used for fewer-copy IO operations,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) * to get a handle on the memory by some means other than accesses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) * via the user virtual addresses. The pages may be submitted for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) * DMA to devices or accessed via their kernel linear mapping (via the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) * kmap APIs). Care should be taken to use the correct cache flushing APIs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) * See also get_user_pages_fast, for performance critical applications.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) * get_user_pages_remote should be phased out in favor of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) * get_user_pages_locked|unlocked or get_user_pages_fast. Nothing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) * should use get_user_pages_remote because it cannot pass
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) long get_user_pages_remote(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) struct vm_area_struct **vmas, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) if (!is_valid_gup_flags(gup_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) pages, vmas, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) EXPORT_SYMBOL(get_user_pages_remote);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) #else /* CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) long get_user_pages_remote(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) struct vm_area_struct **vmas, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) static long __get_user_pages_remote(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) struct vm_area_struct **vmas, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) #endif /* !CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) * get_user_pages() - pin user pages in memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) * @start: starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) * @nr_pages: number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) * @gup_flags: flags modifying lookup behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) * @pages: array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) * Should be at least nr_pages long. Or NULL, if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) * only intends to ensure the pages are faulted in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) * @vmas: array of pointers to vmas corresponding to each page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) * Or NULL if the caller does not require them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) * This is the same as get_user_pages_remote(), just with a less-flexible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) * calling convention where we assume that the mm being operated on belongs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) * the current task, and doesn't allow passing of a locked parameter. We also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) * obviously don't pass FOLL_REMOTE in here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) long get_user_pages(unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) struct vm_area_struct **vmas)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) if (!is_valid_gup_flags(gup_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) return __gup_longterm_locked(current->mm, start, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) pages, vmas, gup_flags | FOLL_TOUCH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) EXPORT_SYMBOL(get_user_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) * get_user_pages_locked() is suitable to replace the form:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) * mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) * do_something()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) * get_user_pages(mm, ..., pages, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) * mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) * to:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) * int locked = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) * mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) * do_something()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) * get_user_pages_locked(mm, ..., pages, &locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) * if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) * mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) * @start: starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) * @nr_pages: number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) * @gup_flags: flags modifying lookup behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) * @pages: array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) * Should be at least nr_pages long. Or NULL, if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) * only intends to ensure the pages are faulted in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) * @locked: pointer to lock flag indicating whether lock is held and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) * subsequently whether VM_FAULT_RETRY functionality can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) * utilised. Lock must initially be held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) * We can leverage the VM_FAULT_RETRY functionality in the page fault
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) * paths better by using either get_user_pages_locked() or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) * get_user_pages_unlocked().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) * FIXME: Current FOLL_LONGTERM behavior is incompatible with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) * vmas. As there are no users of this flag in this call we simply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) * disallow this option for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) * never directly by the caller, so enforce that:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) return __get_user_pages_locked(current->mm, start, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) pages, NULL, locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) gup_flags | FOLL_TOUCH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) EXPORT_SYMBOL(get_user_pages_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) * get_user_pages_unlocked() is suitable to replace the form:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) * mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) * get_user_pages(mm, ..., pages, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) * mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) * with:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) * get_user_pages_unlocked(mm, ..., pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) * It is functionally equivalent to get_user_pages_fast so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) * get_user_pages_fast should be used instead if specific gup_flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) * (e.g. FOLL_FORCE) are not required.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) struct page **pages, unsigned int gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) int locked = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) * FIXME: Current FOLL_LONGTERM behavior is incompatible with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) * vmas. As there are no users of this flag in this call we simply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) * disallow this option for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) &locked, gup_flags | FOLL_TOUCH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) EXPORT_SYMBOL(get_user_pages_unlocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) * Fast GUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) * get_user_pages_fast attempts to pin user pages by walking the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) * tables directly and avoids taking locks. Thus the walker needs to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) * protected from page table pages being freed from under it, and should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) * block any THP splits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) * One way to achieve this is to have the walker disable interrupts, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) * rely on IPIs from the TLB flushing code blocking before the page table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) * pages are freed. This is unsuitable for architectures that do not need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) * to broadcast an IPI when invalidating TLBs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) * Another way to achieve this is to batch up page table containing pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) * belonging to more than one mm_user, then rcu_sched a callback to free those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) * pages. Disabling interrupts will allow the fast_gup walker to both block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) * the rcu_sched callback, and an IPI that we broadcast for splitting THPs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) * (which is a relatively rare event). The code below adopts this strategy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) * Before activating this code, please be aware that the following assumptions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) * are currently made:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) * *) Either MMU_GATHER_RCU_TABLE_FREE is enabled, and tlb_remove_table() is used to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) * free pages containing page tables or TLB flushing requires IPI broadcast.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) * *) ptes can be read atomically by the architecture.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) * *) access_ok is sufficient to validate userspace address ranges.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) * The last two assumptions can be relaxed by the addition of helper functions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) * This code is based heavily on the PowerPC implementation by Nick Piggin.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) #ifdef CONFIG_HAVE_FAST_GUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) * WARNING: only to be used in the get_user_pages_fast() implementation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) * With get_user_pages_fast(), we walk down the pagetables without taking any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) * locks. For this we would like to load the pointers atomically, but sometimes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) * we do have is the guarantee that a PTE will only either go from not present
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) * to present, or present to not present or both -- it will not switch to a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) * completely different present page without a TLB flush in between; something
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) * that we are blocking by holding interrupts off.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) * Setting ptes from not present to present goes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) * ptep->pte_high = h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) * smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) * ptep->pte_low = l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) * And present to not present goes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) * ptep->pte_low = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) * smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) * ptep->pte_high = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) * We load pte_high *after* loading pte_low, which ensures we don't see an older
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) * value of pte_high. *Then* we recheck pte_low, which ensures that we haven't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) * picked up a changed pte high. We might have gotten rubbish values from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) * pte_low and pte_high, but we are guaranteed that pte_low will not have the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) * present bit set *unless* it is 'l'. Because get_user_pages_fast() only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) * operates on present ptes we're safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) static inline pte_t gup_get_pte(pte_t *ptep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) pte.pte_low = ptep->pte_low;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) pte.pte_high = ptep->pte_high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) } while (unlikely(pte.pte_low != ptep->pte_low));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) return pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) #else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) * We require that the PTE can be read atomically.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) static inline pte_t gup_get_pte(pte_t *ptep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) return ptep_get(ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) #endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) while ((*nr) - nr_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) struct page *page = pages[--(*nr)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) ClearPageReferenced(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) if (flags & FOLL_PIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) unpin_user_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) struct dev_pagemap *pgmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) int nr_start = *nr, ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) pte_t *ptep, *ptem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) ptem = ptep = pte_offset_map(&pmd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) pte_t pte = gup_get_pte(ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) struct page *head, *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) * Similar to the PMD case below, NUMA hinting must take slow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) * path using the pte_protnone check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) if (pte_protnone(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) if (!pte_access_permitted(pte, flags & FOLL_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) if (pte_devmap(pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) if (unlikely(flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) if (unlikely(!pgmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) undo_dev_pagemap(nr, nr_start, flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) } else if (pte_special(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) page = pte_page(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) head = try_grab_compound_head(page, 1, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) if (unlikely(pte_val(pte) != pte_val(*ptep))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) put_compound_head(head, 1, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) VM_BUG_ON_PAGE(compound_head(page) != head, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) * We need to make the page accessible if and only if we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) * going to access its content (the FOLL_PIN case). Please
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) * see Documentation/core-api/pin_user_pages.rst for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) * details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) if (flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) ret = arch_make_page_accessible(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) unpin_user_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) SetPageReferenced(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) pages[*nr] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) (*nr)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) } while (ptep++, addr += PAGE_SIZE, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) pte_unmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) if (pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) put_dev_pagemap(pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) pte_unmap(ptem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) * If we can't determine whether or not a pte is special, then fail immediately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) * for ptes. Note, we can still pin HugeTLB and THP as these are guaranteed not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) * to be special.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) * For a futex to be placed on a THP tail page, get_futex_key requires a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) * get_user_pages_fast_only implementation that can pin pages. Thus it's still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) * useful to have gup_huge_pmd even if we can't operate on ptes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) static int __gup_device_huge(unsigned long pfn, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) int nr_start = *nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) struct dev_pagemap *pgmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) struct page *page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) pgmap = get_dev_pagemap(pfn, pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) if (unlikely(!pgmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) undo_dev_pagemap(nr, nr_start, flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) SetPageReferenced(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) pages[*nr] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) if (unlikely(!try_grab_page(page, flags))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) undo_dev_pagemap(nr, nr_start, flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) (*nr)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) pfn++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) } while (addr += PAGE_SIZE, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) if (pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) put_dev_pagemap(pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) unsigned long fault_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) int nr_start = *nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) undo_dev_pagemap(nr, nr_start, flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) unsigned long fault_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) int nr_start = *nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) if (unlikely(pud_val(orig) != pud_val(*pudp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) undo_dev_pagemap(nr, nr_start, flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) BUILD_BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) BUILD_BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) static int record_subpages(struct page *page, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) unsigned long end, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) for (nr = 0; addr != end; addr += PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) pages[nr++] = page++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) return nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) #ifdef CONFIG_ARCH_HAS_HUGEPD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) unsigned long sz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) unsigned long __boundary = (addr + sz) & ~(sz-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) return (__boundary - 1 < end - 1) ? __boundary : end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) unsigned long pte_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) struct page *head, *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) int refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) pte_end = (addr + sz) & ~(sz-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) if (pte_end < end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) end = pte_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) pte = huge_ptep_get(ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) if (!pte_access_permitted(pte, flags & FOLL_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) /* hugepages are never "special" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) head = pte_page(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) refs = record_subpages(page, addr, end, pages + *nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) head = try_grab_compound_head(head, refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) if (unlikely(pte_val(pte) != pte_val(*ptep))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) put_compound_head(head, refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) *nr += refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) SetPageReferenced(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) unsigned int pdshift, unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) pte_t *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) unsigned long sz = 1UL << hugepd_shift(hugepd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) ptep = hugepte_offset(hugepd, addr, pdshift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) next = hugepte_addr_end(addr, end, sz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) } while (ptep++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) unsigned int pdshift, unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) #endif /* CONFIG_ARCH_HAS_HUGEPD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) struct page *head, *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) int refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) if (pmd_devmap(orig)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) if (unlikely(flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) return __gup_device_huge_pmd(orig, pmdp, addr, end, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) pages, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) refs = record_subpages(page, addr, end, pages + *nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) head = try_grab_compound_head(pmd_page(orig), refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) put_compound_head(head, refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) *nr += refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) SetPageReferenced(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) struct page *head, *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) int refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) if (!pud_access_permitted(orig, flags & FOLL_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) if (pud_devmap(orig)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) if (unlikely(flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) return __gup_device_huge_pud(orig, pudp, addr, end, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) pages, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) refs = record_subpages(page, addr, end, pages + *nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) head = try_grab_compound_head(pud_page(orig), refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) if (unlikely(pud_val(orig) != pud_val(*pudp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) put_compound_head(head, refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) *nr += refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) SetPageReferenced(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) int refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) struct page *head, *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) BUILD_BUG_ON(pgd_devmap(orig));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) refs = record_subpages(page, addr, end, pages + *nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) head = try_grab_compound_head(pgd_page(orig), refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) put_compound_head(head, refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) *nr += refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) SetPageReferenced(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) pmd_t *pmdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) pmdp = pmd_offset_lockless(pudp, pud, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) pmd_t pmd = READ_ONCE(*pmdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) next = pmd_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) if (!pmd_present(pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) pmd_devmap(pmd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) * NUMA hinting faults need to be handled in the GUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) * slowpath for accounting purposes and so that they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) * can be serialised against THP migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) if (pmd_protnone(pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) * architecture have different format for hugetlbfs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) * pmd format and THP pmd format
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) PMD_SHIFT, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) } else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) } while (pmdp++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) pud_t *pudp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) pudp = pud_offset_lockless(p4dp, p4d, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) pud_t pud = READ_ONCE(*pudp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) next = pud_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) if (unlikely(!pud_present(pud)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) if (unlikely(pud_huge(pud))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) if (!gup_huge_pud(pud, pudp, addr, next, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) PUD_SHIFT, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) } while (pudp++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) p4d_t *p4dp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) p4dp = p4d_offset_lockless(pgdp, pgd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) p4d_t p4d = READ_ONCE(*p4dp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) next = p4d_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) if (p4d_none(p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) BUILD_BUG_ON(p4d_huge(p4d));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) P4D_SHIFT, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) } while (p4dp++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) static void gup_pgd_range(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) pgd_t *pgdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) pgdp = pgd_offset(current->mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) pgd_t pgd = READ_ONCE(*pgdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) next = pgd_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) if (pgd_none(pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) if (unlikely(pgd_huge(pgd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) if (!gup_huge_pgd(pgd, pgdp, addr, next, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) PGDIR_SHIFT, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) } while (pgdp++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) static inline void gup_pgd_range(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) #endif /* CONFIG_HAVE_FAST_GUP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) #ifndef gup_fast_permitted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) * Check if it's allowed to use get_user_pages_fast_only() for the range, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) * we need to fall back to the slow version:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) static bool gup_fast_permitted(unsigned long start, unsigned long end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) unsigned int gup_flags, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) * FIXME: FOLL_LONGTERM does not work with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) * get_user_pages_unlocked() (see comments in that function)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) if (gup_flags & FOLL_LONGTERM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) mmap_read_lock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) ret = __gup_longterm_locked(current->mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) start, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) pages, NULL, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) mmap_read_unlock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) ret = get_user_pages_unlocked(start, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) pages, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) static unsigned long lockless_pages_from_mm(unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) unsigned int gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) int nr_pinned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) unsigned seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) if (!IS_ENABLED(CONFIG_HAVE_FAST_GUP) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) !gup_fast_permitted(start, end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) if (gup_flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) seq = raw_read_seqcount(¤t->mm->write_protect_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) if (seq & 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) * Disable interrupts. The nested form is used, in order to allow full,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) * general purpose use of this routine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) * With interrupts disabled, we block page table pages from being freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) * from under us. See struct mmu_table_batch comments in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) * include/asm-generic/tlb.h for more details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) * We do not adopt an rcu_read_lock() here as we also want to block IPIs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) * that come from THPs splitting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) gup_pgd_range(start, end, gup_flags, pages, &nr_pinned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) * When pinning pages for DMA there could be a concurrent write protect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) * from fork() via copy_page_range(), in this case always fail fast GUP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) if (gup_flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) if (read_seqcount_retry(¤t->mm->write_protect_seq, seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) unpin_user_pages(pages, nr_pinned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) return nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) static int internal_get_user_pages_fast(unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) unsigned int gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) unsigned long len, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) unsigned long nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) FOLL_FORCE | FOLL_PIN | FOLL_GET |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) FOLL_FAST_ONLY)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) if (gup_flags & FOLL_PIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) atomic_set(¤t->mm->has_pinned, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) if (!(gup_flags & FOLL_FAST_ONLY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) might_lock_read(¤t->mm->mmap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) start = untagged_addr(start) & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) len = nr_pages << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) if (check_add_overflow(start, len, &end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) if (unlikely(!access_ok((void __user *)start, len)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) nr_pinned = lockless_pages_from_mm(start, end, gup_flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) return nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) /* Slow path: try to get the remaining pages with get_user_pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) start += nr_pinned << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) pages += nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) ret = __gup_longterm_unlocked(start, nr_pages - nr_pinned, gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) * The caller has to unpin the pages we already pinned so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) * returning -errno is not an option
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) if (nr_pinned)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) return nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) return ret + nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) * get_user_pages_fast_only() - pin user pages in memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) * @start: starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) * @nr_pages: number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) * @gup_flags: flags modifying pin behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) * @pages: array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) * Should be at least nr_pages long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) * the regular GUP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) * Note a difference with get_user_pages_fast: this always returns the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) * number of pages pinned, 0 if no pages were pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) * If the architecture does not support this function, simply return with no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) * pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) * Careful, careful! COW breaking can go either way, so a non-write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) * access can get ambiguous page results. If you call this function without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) * 'write' set, you'd better be sure that you're ok with that ambiguity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) int get_user_pages_fast_only(unsigned long start, int nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) unsigned int gup_flags, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) int nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) * Internally (within mm/gup.c), gup fast variants must set FOLL_GET,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) * because gup fast is always a "pin with a +1 page refcount" request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) * FOLL_FAST_ONLY is required in order to match the API description of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) * this routine: no fall back to regular ("slow") GUP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) gup_flags |= FOLL_GET | FOLL_FAST_ONLY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) * As specified in the API description above, this routine is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) * allowed to return negative values. However, the common core
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) * routine internal_get_user_pages_fast() *can* return -errno.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) * Therefore, correct for that here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) if (nr_pinned < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) nr_pinned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) return nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) * get_user_pages_fast() - pin user pages in memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) * @start: starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) * @nr_pages: number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) * @gup_flags: flags modifying pin behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) * @pages: array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) * Should be at least nr_pages long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) * Attempt to pin user pages in memory without taking mm->mmap_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) * If not successful, it will fall back to taking the lock and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) * calling get_user_pages().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) * Returns number of pages pinned. This may be fewer than the number requested.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) * If nr_pages is 0 or negative, returns 0. If no pages were pinned, returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) * -errno.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) int get_user_pages_fast(unsigned long start, int nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) unsigned int gup_flags, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) if (!is_valid_gup_flags(gup_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) * The caller may or may not have explicitly set FOLL_GET; either way is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) * OK. However, internally (within mm/gup.c), gup fast variants must set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) * FOLL_GET, because gup fast is always a "pin with a +1 page refcount"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) * request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) gup_flags |= FOLL_GET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) EXPORT_SYMBOL_GPL(get_user_pages_fast);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) * pin_user_pages_fast() - pin user pages in memory without taking locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) * @start: starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) * @nr_pages: number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) * @gup_flags: flags modifying pin behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) * @pages: array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) * Should be at least nr_pages long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) * Nearly the same as get_user_pages_fast(), except that FOLL_PIN is set. See
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) * get_user_pages_fast() for documentation on the function arguments, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) * the arguments here are identical.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) * see Documentation/core-api/pin_user_pages.rst for further details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) int pin_user_pages_fast(unsigned long start, int nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) unsigned int gup_flags, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) /* FOLL_GET and FOLL_PIN are mutually exclusive. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) if (WARN_ON_ONCE(gup_flags & FOLL_GET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) gup_flags |= FOLL_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) EXPORT_SYMBOL_GPL(pin_user_pages_fast);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) * This is the FOLL_PIN equivalent of get_user_pages_fast_only(). Behavior
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) * is the same, except that this one sets FOLL_PIN instead of FOLL_GET.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) * The API rules are the same, too: no negative values may be returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) int pin_user_pages_fast_only(unsigned long start, int nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) unsigned int gup_flags, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) int nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) * FOLL_GET and FOLL_PIN are mutually exclusive. Note that the API
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) * rules require returning 0, rather than -errno:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) if (WARN_ON_ONCE(gup_flags & FOLL_GET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) * FOLL_FAST_ONLY is required in order to match the API description of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) * this routine: no fall back to regular ("slow") GUP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) gup_flags |= (FOLL_PIN | FOLL_FAST_ONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) * This routine is not allowed to return negative values. However,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) * internal_get_user_pages_fast() *can* return -errno. Therefore,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) * correct for that here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) if (nr_pinned < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) nr_pinned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) return nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) * pin_user_pages_remote() - pin pages of a remote process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) * @mm: mm_struct of target mm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) * @start: starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) * @nr_pages: number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) * @gup_flags: flags modifying lookup behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) * @pages: array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) * Should be at least nr_pages long. Or NULL, if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) * only intends to ensure the pages are faulted in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) * @vmas: array of pointers to vmas corresponding to each page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) * Or NULL if the caller does not require them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) * @locked: pointer to lock flag indicating whether lock is held and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) * subsequently whether VM_FAULT_RETRY functionality can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) * utilised. Lock must initially be held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) * Nearly the same as get_user_pages_remote(), except that FOLL_PIN is set. See
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) * get_user_pages_remote() for documentation on the function arguments, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) * the arguments here are identical.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) * see Documentation/core-api/pin_user_pages.rst for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) long pin_user_pages_remote(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) struct vm_area_struct **vmas, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) /* FOLL_GET and FOLL_PIN are mutually exclusive. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) if (WARN_ON_ONCE(gup_flags & FOLL_GET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) gup_flags |= FOLL_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) pages, vmas, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) EXPORT_SYMBOL(pin_user_pages_remote);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) * pin_user_pages() - pin user pages in memory for use by other devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) * @start: starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) * @nr_pages: number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) * @gup_flags: flags modifying lookup behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) * @pages: array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) * Should be at least nr_pages long. Or NULL, if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) * only intends to ensure the pages are faulted in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) * @vmas: array of pointers to vmas corresponding to each page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) * Or NULL if the caller does not require them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) * Nearly the same as get_user_pages(), except that FOLL_TOUCH is not set, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) * FOLL_PIN is set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) * see Documentation/core-api/pin_user_pages.rst for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) long pin_user_pages(unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) struct vm_area_struct **vmas)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) /* FOLL_GET and FOLL_PIN are mutually exclusive. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) if (WARN_ON_ONCE(gup_flags & FOLL_GET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) gup_flags |= FOLL_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) return __gup_longterm_locked(current->mm, start, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) pages, vmas, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) EXPORT_SYMBOL(pin_user_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) * pin_user_pages_unlocked() is the FOLL_PIN variant of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) * get_user_pages_unlocked(). Behavior is the same, except that this one sets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) * FOLL_PIN and rejects FOLL_GET.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) struct page **pages, unsigned int gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) /* FOLL_GET and FOLL_PIN are mutually exclusive. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) if (WARN_ON_ONCE(gup_flags & FOLL_GET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) gup_flags |= FOLL_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) EXPORT_SYMBOL(pin_user_pages_unlocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) * pin_user_pages_locked() is the FOLL_PIN variant of get_user_pages_locked().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) * Behavior is the same, except that this one sets FOLL_PIN and rejects
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) * FOLL_GET.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) long pin_user_pages_locked(unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) * FIXME: Current FOLL_LONGTERM behavior is incompatible with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) * vmas. As there are no users of this flag in this call we simply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) * disallow this option for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) /* FOLL_GET and FOLL_PIN are mutually exclusive. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) if (WARN_ON_ONCE(gup_flags & FOLL_GET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) gup_flags |= FOLL_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) return __get_user_pages_locked(current->mm, start, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) pages, NULL, locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) gup_flags | FOLL_TOUCH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) EXPORT_SYMBOL(pin_user_pages_locked);