^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * linux/mm/mincore.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 1994-2006 Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * The mincore() system call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/pagewalk.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/swapops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/shmem_fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/pgtable.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) unsigned long end, struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #ifdef CONFIG_HUGETLB_PAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) unsigned char present;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) unsigned char *vec = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * Hugepages under user process are always in RAM and never
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * swapped out, but theoretically it needs to be checked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) present = pte && !huge_pte_none(huge_ptep_get(pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) for (; addr != end; vec++, addr += PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) *vec = present;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) walk->private = vec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * Later we can get more picky about what "in core" means precisely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * For now, simply check to see if the page is in the page cache,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * and is up to date; i.e. that no page-in operation would be required
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * at this time if an application were to map and access this page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) static unsigned char mincore_page(struct address_space *mapping, pgoff_t index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) unsigned char present = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * When tmpfs swaps out a page from a file, any process mapping that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * file will not get a swp_entry_t in its pte, but rather it is like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * any other file mapping (ie. marked !present and faulted in with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) page = find_get_incore_page(mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) present = PageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) return present;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) struct vm_area_struct *vma, unsigned char *vec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) unsigned long nr = (end - addr) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) if (vma->vm_file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) pgoff_t pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) pgoff = linear_page_index(vma, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) for (i = 0; i < nr; i++, pgoff++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) for (i = 0; i < nr; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) vec[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) return nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) static int mincore_unmapped_range(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) __always_unused int depth,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) walk->private += __mincore_unmapped_range(addr, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) walk->vma, walk->private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) struct vm_area_struct *vma = walk->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) pte_t *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) unsigned char *vec = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) int nr = (end - addr) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) ptl = pmd_trans_huge_lock(pmd, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) if (ptl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) memset(vec, 1, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) if (pmd_trans_unstable(pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) __mincore_unmapped_range(addr, end, vma, vec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) for (; addr != end; ptep++, addr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) pte_t pte = *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) if (pte_none(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) __mincore_unmapped_range(addr, addr + PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) vma, vec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) else if (pte_present(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) *vec = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) else { /* pte is a swap entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) swp_entry_t entry = pte_to_swp_entry(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) if (non_swap_entry(entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * migration or hwpoison entries are always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * uptodate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) *vec = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) #ifdef CONFIG_SWAP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) *vec = mincore_page(swap_address_space(entry),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) swp_offset(entry));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) *vec = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) vec++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) pte_unmap_unlock(ptep - 1, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) walk->private += nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) static inline bool can_do_mincore(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) if (vma_is_anonymous(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) if (!vma->vm_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * Reveal pagecache information only for non-anonymous mappings that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * correspond to the files the calling process could (if tried) open
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * for writing; otherwise we'd be including shared non-exclusive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * mappings, which opens a side channel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) return inode_owner_or_capable(file_inode(vma->vm_file)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) static const struct mm_walk_ops mincore_walk_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) .pmd_entry = mincore_pte_range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) .pte_hole = mincore_unmapped_range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) .hugetlb_entry = mincore_hugetlb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * Do a chunk of "sys_mincore()". We've already checked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * all the arguments, we hold the mmap semaphore: we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * just return the amount of info we're asked for.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) unsigned long end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) vma = find_vma(current->mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) if (!vma || addr < vma->vm_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) if (!can_do_mincore(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) memset(vec, 1, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) return pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) err = walk_page_range(vma->vm_mm, addr, end, &mincore_walk_ops, vec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) return (end - addr) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * The mincore(2) system call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * mincore() returns the memory residency status of the pages in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * current process's address space specified by [addr, addr + len).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) * The status is returned in a vector of bytes. The least significant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) * bit of each byte is 1 if the referenced page is in memory, otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) * it is zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * Because the status of a page can change after mincore() checks it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * but before it returns to the application, the returned vector may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * contain stale information. Only locked pages are guaranteed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * remain in memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * return values:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * zero - success
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * -EFAULT - vec points to an illegal address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) * -EINVAL - addr is not a multiple of PAGE_SIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * -ENOMEM - Addresses in the range [addr, addr + len] are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * invalid for the address space of this process, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * specify one or more pages which are not currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * mapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * -EAGAIN - A kernel resource was temporarily unavailable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) unsigned char __user *, vec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) long retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) unsigned long pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) unsigned char *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) start = untagged_addr(start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) /* Check the start address: needs to be page-aligned.. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) if (start & ~PAGE_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) /* ..and we need to be passed a valid user-space range */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) if (!access_ok((void __user *) start, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) /* This also avoids any overflows on PAGE_ALIGN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) pages = len >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) pages += (offset_in_page(len)) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) if (!access_ok(vec, pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) tmp = (void *) __get_free_page(GFP_USER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) if (!tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) while (pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * Do at most PAGE_SIZE entries per iteration, due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * the temporary buffer size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) mmap_read_lock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) mmap_read_unlock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) if (retval <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) if (copy_to_user(vec, tmp, retval)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) retval = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) pages -= retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) vec += retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) start += retval << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) free_page((unsigned long) tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) }