^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * mm/mmap.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Written by obz.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Address space accounting code <alan@lxorguk.ukuu.org.uk>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/vmacache.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/shm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/capability.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/personality.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/security.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/shmem_fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <linux/profile.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/mount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <linux/mempolicy.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/rmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <linux/mmu_notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <linux/mmdebug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <linux/perf_event.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <linux/audit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include <linux/khugepaged.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <linux/uprobes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <linux/rbtree_augmented.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include <linux/notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include <linux/memory.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #include <linux/printk.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #include <linux/userfaultfd_k.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #include <linux/moduleparam.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #include <linux/pkeys.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #include <linux/oom.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #include <asm/cacheflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #include <asm/tlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #include <asm/mmu_context.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #include <trace/events/mmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #undef CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #include <trace/hooks/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #ifndef arch_mmap_check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #define arch_mmap_check(addr, len, flags) (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) static bool ignore_rlimit_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) static void unmap_region(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) struct vm_area_struct *vma, struct vm_area_struct *prev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) unsigned long start, unsigned long end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) /* description of effects of mapping type and prot in current implementation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * this is due to the limited x86 page protection hardware. The expected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * behavior is in parens:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * map_type prot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * w: (no) no w: (no) no w: (yes) yes w: (no) no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * w: (no) no w: (no) no w: (copy) copy w: (no) no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) pgprot_t protection_map[16] __ro_after_init = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) #ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) return prot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) pgprot_t vm_get_page_prot(unsigned long vm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) pgprot_val(arch_vm_get_page_prot(vm_flags)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) return arch_filter_pgprot(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) EXPORT_SYMBOL(vm_get_page_prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) /* Update vma->vm_page_prot to reflect vma->vm_flags. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) void vma_set_page_prot(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) unsigned long vm_flags = vma->vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) pgprot_t vm_page_prot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) if (vma_wants_writenotify(vma, vm_page_prot)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) vm_flags &= ~VM_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * Requires inode->i_mapping->i_mmap_rwsem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) static void __remove_shared_vm_struct(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) struct file *file, struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) if (vma->vm_flags & VM_DENYWRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) allow_write_access(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) if (vma->vm_flags & VM_SHARED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) mapping_unmap_writable(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) flush_dcache_mmap_lock(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) vma_interval_tree_remove(vma, &mapping->i_mmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) flush_dcache_mmap_unlock(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * Unlink a file-based vm structure from its interval tree, to hide
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * vma from rmap and vmtruncate before freeing its page tables.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) void unlink_file_vma(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) struct file *file = vma->vm_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) i_mmap_lock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) __remove_shared_vm_struct(vma, file, mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) i_mmap_unlock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) static void __free_vma(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) if (vma->vm_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) fput(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) mpol_put(vma_policy(vma));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) vm_area_free(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) #ifdef CONFIG_SPECULATIVE_PAGE_FAULT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) void put_vma(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) if (atomic_dec_and_test(&vma->vm_ref_count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) __free_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) static inline void put_vma(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) __free_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * Close a vm structure and free it, returning the next.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) struct vm_area_struct *next = vma->vm_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) if (vma->vm_ops && vma->vm_ops->close)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) vma->vm_ops->close(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) put_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) return next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) struct list_head *uf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) SYSCALL_DEFINE1(brk, unsigned long, brk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) unsigned long retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) unsigned long newbrk, oldbrk, origbrk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) struct vm_area_struct *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) unsigned long min_brk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) bool populate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) bool downgraded = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) LIST_HEAD(uf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) if (mmap_write_lock_killable(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) origbrk = mm->brk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) #ifdef CONFIG_COMPAT_BRK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * CONFIG_COMPAT_BRK can still be overridden by setting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * randomize_va_space to 2, which will still cause mm->start_brk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * to be arbitrarily shifted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) if (current->brk_randomized)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) min_brk = mm->start_brk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) min_brk = mm->end_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) min_brk = mm->start_brk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) if (brk < min_brk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * Check against rlimit here. If this check is done later after the test
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * of oldbrk with newbrk then it can escape the test and let the data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * segment grow beyond its set limit the in case where the limit is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * not page aligned -Ram Gupta
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) mm->end_data, mm->start_data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) newbrk = PAGE_ALIGN(brk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) oldbrk = PAGE_ALIGN(mm->brk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) if (oldbrk == newbrk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) mm->brk = brk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) goto success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) * Always allow shrinking brk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * __do_munmap() may downgrade mmap_lock to read.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) if (brk <= mm->brk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * mm->brk must to be protected by write mmap_lock so update it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * before downgrading mmap_lock. When __do_munmap() fails,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * mm->brk will be restored from origbrk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) mm->brk = brk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) mm->brk = origbrk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) } else if (ret == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) downgraded = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) goto success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) /* Check against existing mmap mappings. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) next = find_vma(mm, oldbrk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) /* Ok, looks good - let it rip. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) mm->brk = brk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) success:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) if (downgraded)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) mmap_write_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) userfaultfd_unmap_complete(mm, &uf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) if (populate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) mm_populate(oldbrk, newbrk - oldbrk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) return brk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) retval = origbrk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) mmap_write_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) unsigned long gap, prev_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * allow two stack_guard_gaps between them here, and when choosing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * an unmapped area; whereas when expanding we only require one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * That's a little inconsistent, but keeps the code here simpler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) gap = vm_start_gap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) if (vma->vm_prev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) prev_end = vm_end_gap(vma->vm_prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) if (gap > prev_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) gap -= prev_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) gap = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) return gap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) #ifdef CONFIG_DEBUG_VM_RB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) unsigned long max = vma_compute_gap(vma), subtree_gap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) if (vma->vm_rb.rb_left) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) subtree_gap = rb_entry(vma->vm_rb.rb_left,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) struct vm_area_struct, vm_rb)->rb_subtree_gap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) if (subtree_gap > max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) max = subtree_gap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) if (vma->vm_rb.rb_right) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) subtree_gap = rb_entry(vma->vm_rb.rb_right,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) struct vm_area_struct, vm_rb)->rb_subtree_gap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) if (subtree_gap > max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) max = subtree_gap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) return max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) static int browse_rb(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) struct rb_root *root = &mm->mm_rb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) int i = 0, j, bug = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) struct rb_node *nd, *pn = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) unsigned long prev = 0, pend = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) for (nd = rb_first(root); nd; nd = rb_next(nd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) vma = rb_entry(nd, struct vm_area_struct, vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) if (vma->vm_start < prev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) pr_emerg("vm_start %lx < prev %lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) vma->vm_start, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) bug = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) if (vma->vm_start < pend) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) pr_emerg("vm_start %lx < pend %lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) vma->vm_start, pend);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) bug = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) if (vma->vm_start > vma->vm_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) pr_emerg("vm_start %lx > vm_end %lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) vma->vm_start, vma->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) bug = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) spin_lock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) pr_emerg("free gap %lx, correct %lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) vma->rb_subtree_gap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) vma_compute_subtree_gap(vma));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) bug = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) spin_unlock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) pn = nd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) prev = vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) pend = vma->vm_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) j = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) for (nd = pn; nd; nd = rb_prev(nd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) j++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) if (i != j) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) pr_emerg("backwards %d, forwards %d\n", j, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) bug = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) return bug ? -1 : i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) struct rb_node *nd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) for (nd = rb_first(root); nd; nd = rb_next(nd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) vma = rb_entry(nd, struct vm_area_struct, vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) VM_BUG_ON_VMA(vma != ignore &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) static void validate_mm(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) int bug = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) unsigned long highest_address = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) struct vm_area_struct *vma = mm->mmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) while (vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) struct anon_vma *anon_vma = vma->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) struct anon_vma_chain *avc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) if (anon_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) anon_vma_lock_read(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) anon_vma_interval_tree_verify(avc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) anon_vma_unlock_read(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) highest_address = vm_end_gap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) vma = vma->vm_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) if (i != mm->map_count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) bug = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) if (highest_address != mm->highest_vm_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) pr_emerg("mm->highest_vm_end %lx, found %lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) mm->highest_vm_end, highest_address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) bug = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) i = browse_rb(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) if (i != mm->map_count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) if (i != -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) pr_emerg("map_count %d rb %d\n", mm->map_count, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) bug = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) VM_BUG_ON_MM(bug, mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) #define validate_mm_rb(root, ignore) do { } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) #define validate_mm(mm) do { } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) struct vm_area_struct, vm_rb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) unsigned long, rb_subtree_gap, vma_compute_gap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) #ifdef CONFIG_SPECULATIVE_PAGE_FAULT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) #define mm_rb_write_lock(mm) write_lock(&(mm)->mm_rb_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) #define mm_rb_write_unlock(mm) write_unlock(&(mm)->mm_rb_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) #define mm_rb_write_lock(mm) do { } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) #define mm_rb_write_unlock(mm) do { } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) #endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) * Update augmented rbtree rb_subtree_gap values after vma->vm_start or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) * vma->vm_prev->vm_end values changed, without modifying the vma's position
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * in the rbtree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) static void vma_gap_update(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) * As it turns out, RB_DECLARE_CALLBACKS_MAX() already created
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * a callback function that does exactly what we want.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) static inline void vma_rb_insert(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) struct rb_root *root = &mm->mm_rb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) /* All rb_subtree_gap values must be consistent prior to insertion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) validate_mm_rb(root, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) static void __vma_rb_erase(struct vm_area_struct *vma, struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) struct rb_root *root = &mm->mm_rb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) * Note rb_erase_augmented is a fairly large inline function,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) * so make sure we instantiate it only once with our desired
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) * augmented rbtree callbacks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) mm_rb_write_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) mm_rb_write_unlock(mm); /* wmb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) * Ensure the removal is complete before clearing the node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) * Matched by vma_has_changed()/handle_speculative_fault().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) RB_CLEAR_NODE(&vma->vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) struct vm_area_struct *ignore)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * All rb_subtree_gap values must be consistent prior to erase,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) * with the possible exception of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) * a. the "next" vma being erased if next->vm_start was reduced in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * __vma_adjust() -> __vma_unlink()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * b. the vma being erased in detach_vmas_to_be_unmapped() ->
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * vma_rb_erase()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) validate_mm_rb(&mm->mm_rb, ignore);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) __vma_rb_erase(vma, mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) vma_rb_erase_ignore(vma, mm, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * vma has some anon_vma assigned, and is already inserted on that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) * anon_vma's interval trees.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) * Before updating the vma's vm_start / vm_end / vm_pgoff fields, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) * vma must be removed from the anon_vma's interval trees using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) * anon_vma_interval_tree_pre_update_vma().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) * After the update, the vma will be reinserted using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) * anon_vma_interval_tree_post_update_vma().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) * The entire update must be protected by exclusive mmap_lock and by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) * the root anon_vma's mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) struct anon_vma_chain *avc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) struct anon_vma_chain *avc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) static int find_vma_links(struct mm_struct *mm, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) unsigned long end, struct vm_area_struct **pprev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) struct rb_node ***rb_link, struct rb_node **rb_parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) struct rb_node **__rb_link, *__rb_parent, *rb_prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) __rb_link = &mm->mm_rb.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) rb_prev = __rb_parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) while (*__rb_link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) struct vm_area_struct *vma_tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) __rb_parent = *__rb_link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) if (vma_tmp->vm_end > addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) /* Fail if an existing vma overlaps the area */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) if (vma_tmp->vm_start < end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) __rb_link = &__rb_parent->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) rb_prev = __rb_parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) __rb_link = &__rb_parent->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) *pprev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) if (rb_prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) *rb_link = __rb_link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) *rb_parent = __rb_parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) * vma_next() - Get the next VMA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) * @mm: The mm_struct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) * @vma: The current vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) * If @vma is NULL, return the first vma in the mm.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) * Returns: The next VMA after @vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) static inline struct vm_area_struct *vma_next(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) if (!vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) return mm->mmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) return vma->vm_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) * munmap_vma_range() - munmap VMAs that overlap a range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) * @mm: The mm struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) * @start: The start of the range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) * @len: The length of the range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) * @pprev: pointer to the pointer that will be set to previous vm_area_struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) * @rb_link: the rb_node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) * @rb_parent: the parent rb_node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) * Find all the vm_area_struct that overlap from @start to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) * @end and munmap them. Set @pprev to the previous vm_area_struct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) * Returns: -ENOMEM on munmap failure or 0 on success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) struct vm_area_struct **pprev, struct rb_node ***link,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) struct rb_node **parent, struct list_head *uf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) while (find_vma_links(mm, start, start + len, pprev, link, parent))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) if (do_munmap(mm, start, len, uf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) static unsigned long count_vma_pages_range(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) unsigned long addr, unsigned long end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) unsigned long nr_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) /* Find first overlaping mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) vma = find_vma_intersection(mm, addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) if (!vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) nr_pages = (min(end, vma->vm_end) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) max(addr, vma->vm_start)) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) /* Iterate over the rest of the overlaps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) for (vma = vma->vm_next; vma; vma = vma->vm_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) unsigned long overlap_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) if (vma->vm_start > end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) overlap_len = min(end, vma->vm_end) - vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) nr_pages += overlap_len >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) struct rb_node **rb_link, struct rb_node *rb_parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) /* Update tracking information for the gap following the new vma. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) if (vma->vm_next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) vma_gap_update(vma->vm_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) mm->highest_vm_end = vm_end_gap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) * vma->vm_prev wasn't known when we followed the rbtree to find the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) * correct insertion point for that vma. As a result, we could not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) * update the vma vm_rb parents rb_subtree_gap values on the way down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) * So, we first insert the vma with a zero rb_subtree_gap value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) * (to be consistent with what we did on the way down), and then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) * immediately update the gap to the correct value. Finally we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) * rebalance the rbtree after all augmented values have been set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) mm_rb_write_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) rb_link_node(&vma->vm_rb, rb_parent, rb_link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) vma->rb_subtree_gap = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) vma_gap_update(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) vma_rb_insert(vma, mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) mm_rb_write_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) static void __vma_link_file(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) file = vma->vm_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) if (vma->vm_flags & VM_DENYWRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) put_write_access(file_inode(file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) if (vma->vm_flags & VM_SHARED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) mapping_allow_writable(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) flush_dcache_mmap_lock(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) vma_interval_tree_insert(vma, &mapping->i_mmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) flush_dcache_mmap_unlock(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) struct vm_area_struct *prev, struct rb_node **rb_link,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) struct rb_node *rb_parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) __vma_link_list(mm, vma, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) __vma_link_rb(mm, vma, rb_link, rb_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) struct vm_area_struct *prev, struct rb_node **rb_link,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) struct rb_node *rb_parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) struct address_space *mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) if (vma->vm_file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) mapping = vma->vm_file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) i_mmap_lock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) __vma_link(mm, vma, prev, rb_link, rb_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) __vma_link_file(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) if (mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) i_mmap_unlock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) mm->map_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) validate_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) * Helper for vma_adjust() in the split_vma insert case: insert a vma into the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) * mm's list and rbtree. It has already been inserted into the interval tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) struct vm_area_struct *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) struct rb_node **rb_link, *rb_parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) if (find_vma_links(mm, vma->vm_start, vma->vm_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) &prev, &rb_link, &rb_parent))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) __vma_link(mm, vma, prev, rb_link, rb_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) mm->map_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) static __always_inline void __vma_unlink(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) struct vm_area_struct *ignore)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) vma_rb_erase_ignore(vma, mm, ignore);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) __vma_unlink_list(mm, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) /* Kill the cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) vmacache_invalidate(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) * is already present in an i_mmap tree without adjusting the tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) * The following helper function should be used when such adjustments
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) * are necessary. The "insert" vma (if any) is to be inserted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) * before we drop the necessary locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) struct vm_area_struct *expand, bool keep_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) struct address_space *mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) struct rb_root_cached *root = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) struct anon_vma *anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) struct file *file = vma->vm_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) bool start_changed = false, end_changed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) long adjust_next = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) int remove_next = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) vm_write_begin(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) if (next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) vm_write_begin(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) if (next && !insert) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) struct vm_area_struct *exporter = NULL, *importer = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) if (end >= next->vm_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) * vma expands, overlapping all the next, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) * perhaps the one after too (mprotect case 6).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) * The only other cases that gets here are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) * case 1, case 7 and case 8.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) if (next == expand) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) * The only case where we don't expand "vma"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) * and we expand "next" instead is case 8.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) VM_WARN_ON(end != next->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) * remove_next == 3 means we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) * removing "vma" and that to do so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) * swapped "vma" and "next".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) remove_next = 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) VM_WARN_ON(file != next->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) swap(vma, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) VM_WARN_ON(expand != vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) * case 1, 6, 7, remove_next == 2 is case 6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) * remove_next == 1 is case 1 or 7.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) remove_next = 1 + (end > next->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) VM_WARN_ON(remove_next == 2 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) end != next->vm_next->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) /* trim end to next, for case 6 first pass */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) end = next->vm_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) exporter = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) importer = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) * If next doesn't have anon_vma, import from vma after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) * next, if the vma overlaps with it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) if (remove_next == 2 && !next->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) exporter = next->vm_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) } else if (end > next->vm_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) * vma expands, overlapping part of the next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) * mprotect case 5 shifting the boundary up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) adjust_next = (end - next->vm_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) exporter = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) importer = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) VM_WARN_ON(expand != importer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) } else if (end < vma->vm_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) * vma shrinks, and !insert tells it's not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) * split_vma inserting another: so it must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) * mprotect case 4 shifting the boundary down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) adjust_next = -(vma->vm_end - end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) exporter = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) importer = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) VM_WARN_ON(expand != importer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) * Easily overlooked: when mprotect shifts the boundary,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) * make sure the expanding vma has anon_vma set if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) * shrinking vma had, to cover any anon pages imported.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) if (exporter && exporter->anon_vma && !importer->anon_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) importer->anon_vma = exporter->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) error = anon_vma_clone(importer, exporter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) if (next && next != vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) vm_write_end(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) vm_write_end(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) root = &mapping->i_mmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) uprobe_munmap(vma, vma->vm_start, vma->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) if (adjust_next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) uprobe_munmap(next, next->vm_start, next->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) i_mmap_lock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) if (insert) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) * Put into interval tree now, so instantiated pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) * are visible to arm/parisc __flush_dcache_page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) * throughout; but we cannot insert into address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) * space until vma start or end is updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) __vma_link_file(insert);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) anon_vma = vma->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) if (!anon_vma && adjust_next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) anon_vma = next->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) if (anon_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) VM_WARN_ON(adjust_next && next->anon_vma &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) anon_vma != next->anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) anon_vma_lock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) anon_vma_interval_tree_pre_update_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) if (adjust_next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) anon_vma_interval_tree_pre_update_vma(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) flush_dcache_mmap_lock(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) vma_interval_tree_remove(vma, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) if (adjust_next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) vma_interval_tree_remove(next, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) if (start != vma->vm_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) WRITE_ONCE(vma->vm_start, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) start_changed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) if (end != vma->vm_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) WRITE_ONCE(vma->vm_end, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) end_changed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) WRITE_ONCE(vma->vm_pgoff, pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) if (adjust_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) WRITE_ONCE(next->vm_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) next->vm_start + adjust_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) WRITE_ONCE(next->vm_pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) next->vm_pgoff + (adjust_next >> PAGE_SHIFT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) if (adjust_next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) vma_interval_tree_insert(next, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) vma_interval_tree_insert(vma, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) flush_dcache_mmap_unlock(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) if (remove_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) * vma_merge has merged next into vma, and needs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) * us to remove next before dropping the locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) if (remove_next != 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) __vma_unlink(mm, next, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) * vma is not before next if they've been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) * swapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) * pre-swap() next->vm_start was reduced so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) * tell validate_mm_rb to ignore pre-swap()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) * "next" (which is stored in post-swap()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) * "vma").
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) __vma_unlink(mm, next, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) if (file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) __remove_shared_vm_struct(next, file, mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) } else if (insert) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) * split_vma has split insert from vma, and needs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) * us to insert it before dropping the locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) * (it may either follow vma or precede it).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) __insert_vm_struct(mm, insert);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) if (start_changed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) vma_gap_update(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) if (end_changed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) if (!next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) mm->highest_vm_end = vm_end_gap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) else if (!adjust_next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) vma_gap_update(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) if (anon_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) anon_vma_interval_tree_post_update_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) if (adjust_next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) anon_vma_interval_tree_post_update_vma(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) anon_vma_unlock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) i_mmap_unlock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) uprobe_mmap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) if (adjust_next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) uprobe_mmap(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) if (remove_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) if (file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) uprobe_munmap(next, next->vm_start, next->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) if (next->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) anon_vma_merge(vma, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) mm->map_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) vm_write_end(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) put_vma(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) * In mprotect's case 6 (see comments on vma_merge),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) * we must remove another next too. It would clutter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) * up the code too much to do both in one go.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) if (remove_next != 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) * If "next" was removed and vma->vm_end was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) * expanded (up) over it, in turn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) * "next->vm_prev->vm_end" changed and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) * "vma->vm_next" gap must be updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) next = vma->vm_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) if (next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) vm_write_begin(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * For the scope of the comment "next" and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) * "vma" considered pre-swap(): if "vma" was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) * removed, next->vm_start was expanded (down)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) * over it and the "next" gap must be updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) * Because of the swap() the post-swap() "vma"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) * actually points to pre-swap() "next"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) * (post-swap() "next" as opposed is now a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) * dangling pointer).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) next = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) if (remove_next == 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) remove_next = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) end = next->vm_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) else if (next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) vma_gap_update(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) * If remove_next == 2 we obviously can't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) * reach this path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) * If remove_next == 3 we can't reach this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) * path because pre-swap() next is always not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) * NULL. pre-swap() "next" is not being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) * removed and its next->vm_end is not altered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) * (and furthermore "end" already matches
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) * next->vm_end in remove_next == 3).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) * We reach this only in the remove_next == 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) * case if the "next" vma that was removed was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) * the highest vma of the mm. However in such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) * case next->vm_end == "end" and the extended
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) * "vma" has vma->vm_end == next->vm_end so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) * mm->highest_vm_end doesn't need any update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) * in remove_next == 1 case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) if (insert && file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) uprobe_mmap(insert);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) if (next && next != vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) vm_write_end(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) if (!keep_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) vm_write_end(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) validate_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) * If the vma has a ->close operation then the driver probably needs to release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) * per-vma resources, so we don't attempt to merge those.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) static inline int is_mergeable_vma(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) struct file *file, unsigned long vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) const char __user *anon_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) * VM_SOFTDIRTY should not prevent from VMA merging, if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) * match the flags but dirty bit -- the caller should mark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) * merged VMA as dirty. If dirty bit won't be excluded from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) * comparison, we increase pressure on the memory system forcing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) * the kernel to generate new VMAs when old one could be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) * extended instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) if (vma->vm_file != file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) if (vma->vm_ops && vma->vm_ops->close)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) if (vma_get_anon_name(vma) != anon_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) struct anon_vma *anon_vma2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) * The list_is_singular() test is to avoid merging VMA cloned from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) * parents. This can improve scalability caused by anon_vma lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) if ((!anon_vma1 || !anon_vma2) && (!vma ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) list_is_singular(&vma->anon_vma_chain)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) return anon_vma1 == anon_vma2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) * in front of (at a lower virtual address and file offset than) the vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) * We cannot merge two vmas if they have differently assigned (non-NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * We don't check here for the merged mmap wrapping around the end of pagecache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * indices (16TB on ia32) because do_mmap() does not permit mmap's which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) * wrap, nor mmaps which cover the final page at index -1UL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) struct anon_vma *anon_vma, struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) pgoff_t vm_pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) const char __user *anon_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) if (vma->vm_pgoff == vm_pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) * beyond (at a higher virtual address and file offset than) the vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) * We cannot merge two vmas if they have differently assigned (non-NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) struct anon_vma *anon_vma, struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) pgoff_t vm_pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) const char __user *anon_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) pgoff_t vm_pglen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) vm_pglen = vma_pages(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) if (vma->vm_pgoff + vm_pglen == vm_pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) * figure out whether that can be merged with its predecessor or its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) * successor. Or both (it neatly fills a hole).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) * In most cases - when called for mmap, brk or mremap - [addr,end) is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) * certain not to be mapped by the time vma_merge is called; but when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) * called for mprotect, it is certain to be already mapped (either at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) * an offset within prev, or at the start of next), and the flags of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) * this area are about to be changed to vm_flags - and the no-change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) * case has already been eliminated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) * The following mprotect cases have to be considered, where AAAA is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) * the area passed down from mprotect_fixup, never extending beyond one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) * vma, PPPPPP is the prev vma specified, and NNNNNN the next vma after:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) * AAAA AAAA AAAA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) * PPPPPPNNNNNN PPPPPPNNNNNN PPPPPPNNNNNN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) * cannot merge might become might become
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) * PPNNNNNNNNNN PPPPPPPPPPNN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) * mmap, brk or case 4 below case 5 below
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) * mremap move:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) * AAAA AAAA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) * PPPP NNNN PPPPNNNNXXXX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) * might become might become
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) * PPPPPPPPPPPP 1 or PPPPPPPPPPPP 6 or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) * PPPPPPPPNNNN 2 or PPPPPPPPXXXX 7 or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) * PPPPNNNNNNNN 3 PPPPXXXXXXXX 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) * It is important for case 8 that the vma NNNN overlapping the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) * region AAAA is never going to extended over XXXX. Instead XXXX must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) * be extended in region AAAA and NNNN must be removed. This way in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) * all cases where vma_merge succeeds, the moment vma_adjust drops the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) * rmap_locks, the properties of the merged vma will be already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) * correct for the whole merged range. Some of those properties like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) * vm_page_prot/vm_flags may be accessed by rmap_walks and they must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) * be correct for the whole merged range immediately after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) * rmap_locks are released. Otherwise if XXXX would be removed and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) * NNNN would be extended over the XXXX range, remove_migration_ptes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) * or other rmap walkers (if working on addresses beyond the "end"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) * parameter) may establish ptes with the wrong permissions of NNNN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) * instead of the right permissions of XXXX.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) struct vm_area_struct *__vma_merge(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) struct vm_area_struct *prev, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) unsigned long end, unsigned long vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) struct anon_vma *anon_vma, struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) pgoff_t pgoff, struct mempolicy *policy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) const char __user *anon_name, bool keep_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) struct vm_area_struct *area, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) * We later require that vma->vm_flags == vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) * so this tests vma->vm_flags & VM_SPECIAL, too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) if (vm_flags & VM_SPECIAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) next = vma_next(mm, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) area = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) if (area && area->vm_end == end) /* cases 6, 7, 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) next = next->vm_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) /* verify some invariant that must be enforced by the caller */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) VM_WARN_ON(prev && addr <= prev->vm_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) VM_WARN_ON(area && end > area->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) VM_WARN_ON(addr >= end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) * Can it merge with the predecessor?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) if (prev && prev->vm_end == addr &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) mpol_equal(vma_policy(prev), policy) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) can_vma_merge_after(prev, vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) anon_vma, file, pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) vm_userfaultfd_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) anon_name)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) * OK, it can. Can we now merge in the successor as well?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) if (next && end == next->vm_start &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) mpol_equal(policy, vma_policy(next)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) can_vma_merge_before(next, vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) anon_vma, file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) pgoff+pglen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) vm_userfaultfd_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) anon_name) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) is_mergeable_anon_vma(prev->anon_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) next->anon_vma, NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) /* cases 1, 6 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) err = __vma_adjust(prev, prev->vm_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) next->vm_end, prev->vm_pgoff, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) prev, keep_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) } else /* cases 2, 5, 7 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) err = __vma_adjust(prev, prev->vm_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) end, prev->vm_pgoff, NULL, prev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) keep_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) khugepaged_enter_vma_merge(prev, vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) return prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) * Can this new request be merged in front of next?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) if (next && end == next->vm_start &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) mpol_equal(policy, vma_policy(next)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) can_vma_merge_before(next, vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) anon_vma, file, pgoff+pglen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) vm_userfaultfd_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) anon_name)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) if (prev && addr < prev->vm_end) /* case 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) err = __vma_adjust(prev, prev->vm_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) addr, prev->vm_pgoff, NULL, next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) keep_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) else { /* cases 3, 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) err = __vma_adjust(area, addr, next->vm_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) next->vm_pgoff - pglen, NULL, next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) keep_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) * In case 3 area is already equal to next and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) * this is a noop, but in case 8 "area" has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) * been removed and next was expanded over it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) area = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) khugepaged_enter_vma_merge(area, vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) return area;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) * Rough compatibility check to quickly see if it's even worth looking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) * at sharing an anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) * They need to have the same vm_file, and the flags can only differ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) * in things that mprotect may change.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) * we can merge the two vma's. For example, we refuse to merge a vma if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) * there is a vm_ops->close() function, because that indicates that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) * driver is doing some kind of reference counting. But that doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) * really matter for the anon_vma sharing case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) return a->vm_end == b->vm_start &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) mpol_equal(vma_policy(a), vma_policy(b)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) a->vm_file == b->vm_file &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) * Do some basic sanity checking to see if we can re-use the anon_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) * the same as 'old', the other will be the new one that is trying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) * to share the anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) * NOTE! This runs with mm_sem held for reading, so it is possible that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) * the anon_vma of 'old' is concurrently in the process of being set up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) * by another page fault trying to merge _that_. But that's ok: if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) * is being set up, that automatically means that it will be a singleton
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) * acceptable for merging, so we can do all of this optimistically. But
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) * we do that READ_ONCE() to make sure that we never re-load the pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) * IOW: that the "list_is_singular()" test on the anon_vma_chain only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) * matters for the 'stable anon_vma' case (ie the thing we want to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) * is to return an anon_vma that is "complex" due to having gone through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) * a fork).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) * We also make sure that the two vma's are compatible (adjacent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) * and with the same memory policies). That's all stable, even with just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) * a read lock on the mm_sem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) if (anon_vma_compatible(a, b)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) if (anon_vma && list_is_singular(&old->anon_vma_chain))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) return anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) * find_mergeable_anon_vma is used by anon_vma_prepare, to check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) * neighbouring vmas for a suitable anon_vma, before it goes off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) * to allocate a new anon_vma. It checks because a repetitive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) * sequence of mprotects and faults may otherwise lead to distinct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) * anon_vmas being allocated, preventing vma merge in subsequent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) * mprotect.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) struct anon_vma *anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) /* Try next first. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) if (vma->vm_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) if (anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) return anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) /* Try prev next. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) if (vma->vm_prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) * We might reach here with anon_vma == NULL if we can't find
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) * any reusable anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) * There's no absolute need to look only at touching neighbours:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) * we could search further afield for "compatible" anon_vmas.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) * But it would probably just be a waste of time searching,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) * or lead to too many vmas hanging off the same anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) * We're trying to allow mprotect remerging later on,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) * not trying to minimize memory used for anon_vmas.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) return anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) * If a hint addr is less than mmap_min_addr change hint to be as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) * low as possible but still greater than mmap_min_addr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) static inline unsigned long round_hint_to_min(unsigned long hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) hint &= PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) if (((void *)hint != NULL) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) (hint < mmap_min_addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) return PAGE_ALIGN(mmap_min_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) return hint;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) static inline int mlock_future_check(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) unsigned long flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) unsigned long len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) unsigned long locked, lock_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) /* mlock MCL_FUTURE? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) if (flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) locked = len >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) locked += mm->locked_vm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) lock_limit = rlimit(RLIMIT_MEMLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) lock_limit >>= PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) if (locked > lock_limit && !capable(CAP_IPC_LOCK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) if (S_ISREG(inode->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) return MAX_LFS_FILESIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) if (S_ISBLK(inode->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) return MAX_LFS_FILESIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) if (S_ISSOCK(inode->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) return MAX_LFS_FILESIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) /* Special "we do even unsigned file positions" case */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) if (file->f_mode & FMODE_UNSIGNED_OFFSET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) /* Yes, random drivers might want more. But I'm tired of buggy drivers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) return ULONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) static inline bool file_mmap_ok(struct file *file, struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) unsigned long pgoff, unsigned long len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) u64 maxsize = file_mmap_size_max(file, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) if (maxsize && len > maxsize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) maxsize -= len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) if (pgoff > maxsize >> PAGE_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) * The caller must write-lock current->mm->mmap_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) unsigned long do_mmap(struct file *file, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) unsigned long len, unsigned long prot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) unsigned long flags, unsigned long pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) unsigned long *populate, struct list_head *uf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) vm_flags_t vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) int pkey = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) *populate = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) if (!len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) * Does the application expect PROT_READ to imply PROT_EXEC?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) * (the exception is when the underlying filesystem is noexec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) * mounted, in which case we dont add PROT_EXEC.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) if (!(file && path_noexec(&file->f_path)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) prot |= PROT_EXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) /* force arch specific MAP_FIXED handling in get_unmapped_area */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) if (flags & MAP_FIXED_NOREPLACE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) flags |= MAP_FIXED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) if (!(flags & MAP_FIXED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) addr = round_hint_to_min(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) /* Careful about overflows.. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) len = PAGE_ALIGN(len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) if (!len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) /* offset overflow? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) /* Too many mappings? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) if (mm->map_count > sysctl_max_map_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) /* Obtain the address to map to. we verify (or select) it and ensure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) * that it represents a valid section of the address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) addr = get_unmapped_area(file, addr, len, pgoff, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) if (IS_ERR_VALUE(addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) if (flags & MAP_FIXED_NOREPLACE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) struct vm_area_struct *vma = find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) if (vma && vma->vm_start < addr + len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) if (prot == PROT_EXEC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) pkey = execute_only_pkey(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) if (pkey < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) pkey = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) /* Do simple checking here so the lower-level routines won't have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) * to. we assume access permissions have been handled by the open
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) * of the memory object, so we don't do any here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) if (flags & MAP_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) if (!can_do_mlock())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) if (mlock_future_check(mm, vm_flags, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) unsigned long flags_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) if (!file_mmap_ok(file, inode, pgoff, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) switch (flags & MAP_TYPE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) case MAP_SHARED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) * Force use of MAP_SHARED_VALIDATE with non-legacy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) * flags. E.g. MAP_SYNC is dangerous to use with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) * MAP_SHARED as you don't know which consistency model
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) * you will get. We silently ignore unsupported flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) * with MAP_SHARED to preserve backward compatibility.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) flags &= LEGACY_MAP_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) case MAP_SHARED_VALIDATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) if (flags & ~flags_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) if (prot & PROT_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) if (!(file->f_mode & FMODE_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) return -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) if (IS_SWAPFILE(file->f_mapping->host))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) return -ETXTBSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) * Make sure we don't allow writing to an append-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) * file..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) return -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) * Make sure there are no mandatory locks on the file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) if (locks_verify_locked(file))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) vm_flags |= VM_SHARED | VM_MAYSHARE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) if (!(file->f_mode & FMODE_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) case MAP_PRIVATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) if (!(file->f_mode & FMODE_READ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) return -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) if (path_noexec(&file->f_path)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) if (vm_flags & VM_EXEC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) vm_flags &= ~VM_MAYEXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) if (!file->f_op->mmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) switch (flags & MAP_TYPE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) case MAP_SHARED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) * Ignore pgoff.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) pgoff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) vm_flags |= VM_SHARED | VM_MAYSHARE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) case MAP_PRIVATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) * Set pgoff according to addr for anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) pgoff = addr >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) * Set 'VM_NORESERVE' if we should not account for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) * memory use of this mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) if (flags & MAP_NORESERVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) /* We honor MAP_NORESERVE if allowed to overcommit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) vm_flags |= VM_NORESERVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) /* hugetlb applies strict overcommit unless MAP_NORESERVE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) if (file && is_file_hugepages(file))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) vm_flags |= VM_NORESERVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) if (!IS_ERR_VALUE(addr) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) ((vm_flags & VM_LOCKED) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) *populate = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) unsigned long prot, unsigned long flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) unsigned long fd, unsigned long pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) struct file *file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) unsigned long retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) if (!(flags & MAP_ANONYMOUS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) audit_mmap_fd(fd, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) file = fget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) if (!file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) if (is_file_hugepages(file)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) len = ALIGN(len, huge_page_size(hstate_file(file)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) } else if (unlikely(flags & MAP_HUGETLB)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) retval = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) } else if (flags & MAP_HUGETLB) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) struct user_struct *user = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) struct hstate *hs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) if (!hs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) len = ALIGN(len, huge_page_size(hs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) * VM_NORESERVE is used because the reservations will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) * taken when vm_ops->mmap() is called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) * A dummy user value is used because we are not locking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) * memory so no accounting is necessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) VM_NORESERVE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) &user, HUGETLB_ANONHUGE_INODE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) if (IS_ERR(file))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) return PTR_ERR(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) out_fput:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) if (file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) unsigned long, prot, unsigned long, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) unsigned long, fd, unsigned long, pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) #ifdef __ARCH_WANT_SYS_OLD_MMAP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) struct mmap_arg_struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) unsigned long len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) unsigned long prot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) unsigned long fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) unsigned long offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) struct mmap_arg_struct a;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) if (copy_from_user(&a, arg, sizeof(a)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) if (offset_in_page(a.offset))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) a.offset >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) #endif /* __ARCH_WANT_SYS_OLD_MMAP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) * Some shared mappings will want the pages marked read-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) * to track write events. If so, we'll downgrade vm_page_prot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) * to the private version (using protection_map[] without the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) * VM_SHARED bit).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) vm_flags_t vm_flags = vma->vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) const struct vm_operations_struct *vm_ops = vma->vm_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) /* If it was private or non-writable, the write bit is already clear */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) /* The backer wishes to know when pages are first written to? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) /* The open routine did something to the protections that pgprot_modify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) * won't preserve? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) if (pgprot_val(vm_page_prot) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) /* Do we need to track softdirty? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) /* Specialty mapping? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) if (vm_flags & VM_PFNMAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) /* Can the mapping track the dirty pages? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) return vma->vm_file && vma->vm_file->f_mapping &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) mapping_can_writeback(vma->vm_file->f_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) * We account for memory if it's a private writeable mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) * not hugepages and VM_NORESERVE wasn't set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) * hugetlb has its own accounting separate from the core VM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) * VM_HUGETLB may not be set yet so we cannot check for that flag.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) if (file && is_file_hugepages(file))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) unsigned long mmap_region(struct file *file, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) struct list_head *uf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) struct vm_area_struct *vma, *prev, *merge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) struct rb_node **rb_link, *rb_parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) unsigned long charged = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) /* Check against address space limit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) unsigned long nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) * MAP_FIXED may remove pages of mappings that intersects with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) * requested mapping. Account for the pages it would unmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) nr_pages = count_vma_pages_range(mm, addr, addr + len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) if (!may_expand_vm(mm, vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) (len >> PAGE_SHIFT) - nr_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) /* Clear old maps, set up prev, rb_link, rb_parent, and uf */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) * Private writable mapping: check memory availability
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) if (accountable_mapping(file, vm_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) charged = len >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) if (security_vm_enough_memory_mm(mm, charged))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) vm_flags |= VM_ACCOUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) * Can we just expand an old mapping?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) if (vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) * Determine the object being mapped and call the appropriate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) * specific mapper. the address has already been validated, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) * not unmapped, but the maps are removed from the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) vma = vm_area_alloc(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) if (!vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) error = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) goto unacct_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) vma->vm_start = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) vma->vm_end = addr + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) vma->vm_flags = vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) vma->vm_page_prot = vm_get_page_prot(vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) vma->vm_pgoff = pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) if (vm_flags & VM_DENYWRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) error = deny_write_access(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) goto free_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) if (vm_flags & VM_SHARED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) error = mapping_map_writable(file->f_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) goto allow_write_and_free_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) /* ->mmap() can change vma->vm_file, but must guarantee that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) * vma_link() below can deny write-access if VM_DENYWRITE is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) * and map writably if VM_SHARED is set. This usually means the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) * new file must not have been exposed to user-space, yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) vma->vm_file = get_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) error = call_mmap(file, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) goto unmap_and_free_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) /* Can addr have changed??
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) * Answer: Yes, several device drivers can do it in their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) * f_op->mmap method. -DaveM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) * Bug: If addr is changed, prev, rb_link, rb_parent should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) * be updated for vma_link()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) WARN_ON_ONCE(addr != vma->vm_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) addr = vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) /* If vm_flags changed after call_mmap(), we should try merge vma again
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) * as we may succeed this time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) if (unlikely(vm_flags != vma->vm_flags && prev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) vma_get_anon_name(vma));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) if (merge) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) /* ->mmap() can change vma->vm_file and fput the original file. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) * fput the vma->vm_file here or we would add an extra fput for file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) * and cause general protection fault ultimately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) fput(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) vm_area_free(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) vma = merge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) /* Update vm_flags to pick up the change. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) vm_flags = vma->vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) goto unmap_writable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) vm_flags = vma->vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) } else if (vm_flags & VM_SHARED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) error = shmem_zero_setup(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) goto free_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) vma_set_anonymous(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) /* Allow architectures to sanity-check the vm_flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) if (!arch_validate_flags(vma->vm_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) if (file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) goto unmap_and_free_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) goto free_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) vma_link(mm, vma, prev, rb_link, rb_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) /* Once vma denies write, undo our temporary denial count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) unmap_writable:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) if (vm_flags & VM_SHARED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) mapping_unmap_writable(file->f_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) if (vm_flags & VM_DENYWRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) allow_write_access(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) file = vma->vm_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) perf_event_mmap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) vm_write_begin(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) if (vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) is_vm_hugetlb_page(vma) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) vma == get_gate_vma(current->mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) WRITE_ONCE(vma->vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) vma->vm_flags & VM_LOCKED_CLEAR_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) mm->locked_vm += (len >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) if (file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) uprobe_mmap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) * New (or expanded) vma always get soft dirty status.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) * Otherwise user-space soft-dirty page tracker won't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) * be able to distinguish situation when vma area unmapped,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) * then new mapped in-place (which must be aimed as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) * a completely new data area).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) WRITE_ONCE(vma->vm_flags, vma->vm_flags | VM_SOFTDIRTY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) vma_set_page_prot(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) vm_write_end(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) trace_android_vh_mmap_region(vma, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) unmap_and_free_vma:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) vma->vm_file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) /* Undo any partial mapping done by a device driver. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) charged = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) if (vm_flags & VM_SHARED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) mapping_unmap_writable(file->f_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) allow_write_and_free_vma:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) if (vm_flags & VM_DENYWRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) allow_write_access(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) free_vma:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) vm_area_free(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) unacct_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) if (charged)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) vm_unacct_memory(charged);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) * We implement the search by looking for an rbtree node that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) * immediately follows a suitable gap. That is,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) * - gap_start = vma->vm_prev->vm_end <= info->high_limit - length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) * - gap_end = vma->vm_start >= info->low_limit + length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) * - gap_end - gap_start >= length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) unsigned long length, low_limit, high_limit, gap_start, gap_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) /* Adjust search length to account for worst case alignment overhead */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) length = info->length + info->align_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) if (length < info->length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) /* Adjust search limits by the desired length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) if (info->high_limit < length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) high_limit = info->high_limit - length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) if (info->low_limit > high_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) low_limit = info->low_limit + length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) /* Check if rbtree root looks promising */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) if (RB_EMPTY_ROOT(&mm->mm_rb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) goto check_highest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) if (vma->rb_subtree_gap < length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) goto check_highest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) /* Visit left subtree if it looks promising */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) gap_end = vm_start_gap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) if (gap_end >= low_limit && vma->vm_rb.rb_left) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) struct vm_area_struct *left =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) rb_entry(vma->vm_rb.rb_left,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) struct vm_area_struct, vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) if (left->rb_subtree_gap >= length) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) vma = left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) check_current:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) /* Check if current node has a suitable gap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) if (gap_start > high_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) if (gap_end >= low_limit &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) gap_end > gap_start && gap_end - gap_start >= length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) goto found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) /* Visit right subtree if it looks promising */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) if (vma->vm_rb.rb_right) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) struct vm_area_struct *right =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) rb_entry(vma->vm_rb.rb_right,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) struct vm_area_struct, vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) if (right->rb_subtree_gap >= length) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) vma = right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) /* Go back up the rbtree to find next candidate node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) struct rb_node *prev = &vma->vm_rb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) if (!rb_parent(prev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) goto check_highest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) vma = rb_entry(rb_parent(prev),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) struct vm_area_struct, vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) if (prev == vma->vm_rb.rb_left) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) gap_start = vm_end_gap(vma->vm_prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) gap_end = vm_start_gap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) goto check_current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) check_highest:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) /* Check highest gap, which does not precede any rbtree node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) gap_start = mm->highest_vm_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) gap_end = ULONG_MAX; /* Only for VM_BUG_ON below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) if (gap_start > high_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) found:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) /* We found a suitable gap. Clip it with the original low_limit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) if (gap_start < info->low_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) gap_start = info->low_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) /* Adjust gap address to the desired alignment */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) gap_start += (info->align_offset - gap_start) & info->align_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) VM_BUG_ON(gap_start + info->length > info->high_limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) VM_BUG_ON(gap_start + info->length > gap_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) return gap_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) unsigned long length, low_limit, high_limit, gap_start, gap_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) unsigned long addr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) /* Adjust search length to account for worst case alignment overhead */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) length = info->length + info->align_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) if (length < info->length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) trace_android_vh_get_from_fragment_pool(mm, info, &addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) if (addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) * Adjust search limits by the desired length.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) * See implementation comment at top of unmapped_area().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) gap_end = info->high_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) if (gap_end < length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) high_limit = gap_end - length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) if (info->low_limit > high_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) low_limit = info->low_limit + length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) /* Check highest gap, which does not precede any rbtree node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) gap_start = mm->highest_vm_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) if (gap_start <= high_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) goto found_highest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) /* Check if rbtree root looks promising */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) if (RB_EMPTY_ROOT(&mm->mm_rb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) if (vma->rb_subtree_gap < length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) /* Visit right subtree if it looks promising */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) if (gap_start <= high_limit && vma->vm_rb.rb_right) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) struct vm_area_struct *right =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) rb_entry(vma->vm_rb.rb_right,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) struct vm_area_struct, vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) if (right->rb_subtree_gap >= length) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) vma = right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) check_current:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) /* Check if current node has a suitable gap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) gap_end = vm_start_gap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) if (gap_end < low_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) if (gap_start <= high_limit &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) gap_end > gap_start && gap_end - gap_start >= length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) goto found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) /* Visit left subtree if it looks promising */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) if (vma->vm_rb.rb_left) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) struct vm_area_struct *left =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) rb_entry(vma->vm_rb.rb_left,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) struct vm_area_struct, vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) if (left->rb_subtree_gap >= length) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) vma = left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) /* Go back up the rbtree to find next candidate node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) struct rb_node *prev = &vma->vm_rb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) if (!rb_parent(prev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) vma = rb_entry(rb_parent(prev),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) struct vm_area_struct, vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) if (prev == vma->vm_rb.rb_right) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) gap_start = vma->vm_prev ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) vm_end_gap(vma->vm_prev) : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) goto check_current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) found:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) /* We found a suitable gap. Clip it with the original high_limit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) if (gap_end > info->high_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) gap_end = info->high_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) found_highest:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) /* Compute highest gap address at the desired alignment */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) gap_end -= info->length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) gap_end -= (gap_end - info->align_offset) & info->align_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) VM_BUG_ON(gap_end < info->low_limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) VM_BUG_ON(gap_end < gap_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) return gap_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) * Search for an unmapped address range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) * We are looking for a range that:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) * - does not intersect with any VMA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) * - is contained within the [low_limit, high_limit) interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) * - is at least the desired size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) * - satisfies (begin_addr & align_mask) == (align_offset & align_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) addr = unmapped_area_topdown(info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) addr = unmapped_area(info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) trace_vm_unmapped_area(addr, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) EXPORT_SYMBOL_GPL(vm_unmapped_area);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) #ifndef arch_get_mmap_end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) #define arch_get_mmap_end(addr) (TASK_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) #ifndef arch_get_mmap_base
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) #define arch_get_mmap_base(addr, base) (base)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) /* Get an address range which is currently unmapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) * For shmat() with addr=0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) * Ugly calling convention alert:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) * Return value with the low bits set means error value,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) * ie
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) * if (ret & ~PAGE_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) * error = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) * This function "knows" that -ENOMEM has the bits set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) #ifndef HAVE_ARCH_UNMAPPED_AREA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) arch_get_unmapped_area(struct file *filp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) unsigned long len, unsigned long pgoff, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) struct vm_area_struct *vma, *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) struct vm_unmapped_area_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) const unsigned long mmap_end = arch_get_mmap_end(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) if (len > mmap_end - mmap_min_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) if (flags & MAP_FIXED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) if (addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) addr = PAGE_ALIGN(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) vma = find_vma_prev(mm, addr, &prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) if (mmap_end - len >= addr && addr >= mmap_min_addr &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) (!vma || addr + len <= vm_start_gap(vma)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) (!prev || addr >= vm_end_gap(prev)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) info.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) info.length = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) info.low_limit = mm->mmap_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) info.high_limit = mmap_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) info.align_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) info.align_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) return vm_unmapped_area(&info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) * This mmap-allocator allocates new areas top-down from below the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) * stack's low limit (the base):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) #ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) unsigned long len, unsigned long pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) struct vm_area_struct *vma, *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) struct vm_unmapped_area_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) const unsigned long mmap_end = arch_get_mmap_end(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) /* requested length too big for entire address space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) if (len > mmap_end - mmap_min_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) if (flags & MAP_FIXED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) /* requesting a specific address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) if (addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) addr = PAGE_ALIGN(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) vma = find_vma_prev(mm, addr, &prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) if (mmap_end - len >= addr && addr >= mmap_min_addr &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) (!vma || addr + len <= vm_start_gap(vma)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) (!prev || addr >= vm_end_gap(prev)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) info.flags = VM_UNMAPPED_AREA_TOPDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) info.length = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) info.low_limit = max(PAGE_SIZE, mmap_min_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) info.high_limit = arch_get_mmap_base(addr, mm->mmap_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) info.align_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) info.align_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) trace_android_vh_exclude_reserved_zone(mm, &info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) addr = vm_unmapped_area(&info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) * A failed mmap() very likely causes application failure,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) * so fall back to the bottom-up function here. This scenario
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) * can happen with large stack limits and large mmap()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) * allocations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) if (offset_in_page(addr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) VM_BUG_ON(addr != -ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) info.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) info.low_limit = TASK_UNMAPPED_BASE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) info.high_limit = mmap_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) addr = vm_unmapped_area(&info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) trace_android_vh_include_reserved_zone(mm, &info, &addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) unsigned long pgoff, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) unsigned long (*get_area)(struct file *, unsigned long,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) unsigned long, unsigned long, unsigned long);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) unsigned long error = arch_mmap_check(addr, len, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) /* Careful about overflows.. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) if (len > TASK_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) get_area = current->mm->get_unmapped_area;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) if (file->f_op->get_unmapped_area)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) get_area = file->f_op->get_unmapped_area;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) } else if (flags & MAP_SHARED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) * mmap_region() will call shmem_zero_setup() to create a file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) * so use shmem's get_unmapped_area in case it can be huge.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) * do_mmap() will clear pgoff, so match alignment.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) pgoff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) get_area = shmem_get_unmapped_area;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) addr = get_area(file, addr, len, pgoff, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) if (IS_ERR_VALUE(addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) return addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) if (addr > TASK_SIZE - len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) if (offset_in_page(addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) error = security_mmap_addr(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) return error ? error : addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) EXPORT_SYMBOL(get_unmapped_area);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) static struct vm_area_struct *__find_vma(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) struct rb_node *rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) struct vm_area_struct *vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) rb_node = mm->mm_rb.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) while (rb_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) struct vm_area_struct *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) if (tmp->vm_end > addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) vma = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) if (tmp->vm_start <= addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) rb_node = rb_node->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) rb_node = rb_node->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) return vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) /* Check the cache first. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) vma = vmacache_find(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) if (likely(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) return vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) vma = __find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) if (vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) vmacache_update(addr, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) return vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) EXPORT_SYMBOL(find_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) #ifdef CONFIG_SPECULATIVE_PAGE_FAULT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) struct vm_area_struct *get_vma(struct mm_struct *mm, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) struct vm_area_struct *vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) read_lock(&mm->mm_rb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) vma = __find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) if (vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) atomic_inc(&vma->vm_ref_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) read_unlock(&mm->mm_rb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) return vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) * Same as find_vma, but also return a pointer to the previous VMA in *pprev.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) struct vm_area_struct *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) find_vma_prev(struct mm_struct *mm, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) struct vm_area_struct **pprev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) vma = find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) if (vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) *pprev = vma->vm_prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) struct rb_node *rb_node = rb_last(&mm->mm_rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) return vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) * Verify that the stack growth is acceptable and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) * update accounting. This is shared with both the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) * grow-up and grow-down cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) static int acct_stack_growth(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) unsigned long size, unsigned long grow)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) unsigned long new_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) /* address space limit tests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) if (!may_expand_vm(mm, vma->vm_flags, grow))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) /* Stack limit test */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) if (size > rlimit(RLIMIT_STACK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) /* mlock limit tests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) if (vma->vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) unsigned long locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) unsigned long limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) locked = mm->locked_vm + grow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) limit = rlimit(RLIMIT_MEMLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) limit >>= PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) if (locked > limit && !capable(CAP_IPC_LOCK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) /* Check to ensure the stack will not grow into a hugetlb-only region */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) vma->vm_end - size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) if (is_hugepage_only_range(vma->vm_mm, new_start, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) * Overcommit.. This must be the final test, as it will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) * update security statistics.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) if (security_vm_enough_memory_mm(mm, grow))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) * vma is the last one with address > vma->vm_end. Have to extend vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) int expand_upwards(struct vm_area_struct *vma, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) struct vm_area_struct *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) unsigned long gap_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) if (!(vma->vm_flags & VM_GROWSUP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) /* Guard against exceeding limits of the address space. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) address &= PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) if (address >= (TASK_SIZE & PAGE_MASK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) address += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) /* Enforce stack_guard_gap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) gap_addr = address + stack_guard_gap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) /* Guard against overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) if (gap_addr < address || gap_addr > TASK_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) gap_addr = TASK_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) next = vma->vm_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) if (!(next->vm_flags & VM_GROWSUP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) /* Check that both stack segments have the same anon_vma? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) /* We must make sure the anon_vma is allocated. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) if (unlikely(anon_vma_prepare(vma)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) * vma->vm_start/vm_end cannot change under us because the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) * is required to hold the mmap_lock in read mode. We need the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) * anon_vma lock to serialize against concurrent expand_stacks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) anon_vma_lock_write(vma->anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) /* Somebody else might have raced and expanded it already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) if (address > vma->vm_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) unsigned long size, grow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) size = address - vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) grow = (address - vma->vm_end) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) error = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) error = acct_stack_growth(vma, size, grow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) if (!error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) * vma_gap_update() doesn't support concurrent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) * updates, but we only hold a shared mmap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) * lock here, so we need to protect against
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) * concurrent vma expansions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) * anon_vma_lock_write() doesn't help here, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) * we don't guarantee that all growable vmas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) * in a mm share the same root anon vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) * So, we reuse mm->page_table_lock to guard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) * against concurrent vma expansions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) spin_lock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) if (vma->vm_flags & VM_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) mm->locked_vm += grow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) vm_stat_account(mm, vma->vm_flags, grow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) anon_vma_interval_tree_pre_update_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) vma->vm_end = address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) anon_vma_interval_tree_post_update_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) if (vma->vm_next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) vma_gap_update(vma->vm_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) mm->highest_vm_end = vm_end_gap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) spin_unlock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) perf_event_mmap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) anon_vma_unlock_write(vma->anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) khugepaged_enter_vma_merge(vma, vma->vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) validate_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) * vma is the first one with address < vma->vm_start. Have to extend vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) int expand_downwards(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) struct vm_area_struct *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) address &= PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) if (address < mmap_min_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) /* Enforce stack_guard_gap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) prev = vma->vm_prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) /* Check that both stack segments have the same anon_vma? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) vma_is_accessible(prev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) if (address - prev->vm_end < stack_guard_gap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) /* We must make sure the anon_vma is allocated. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) if (unlikely(anon_vma_prepare(vma)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) * vma->vm_start/vm_end cannot change under us because the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) * is required to hold the mmap_lock in read mode. We need the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) * anon_vma lock to serialize against concurrent expand_stacks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) anon_vma_lock_write(vma->anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) /* Somebody else might have raced and expanded it already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) if (address < vma->vm_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) unsigned long size, grow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) size = vma->vm_end - address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) grow = (vma->vm_start - address) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) error = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) if (grow <= vma->vm_pgoff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) error = acct_stack_growth(vma, size, grow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) if (!error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) * vma_gap_update() doesn't support concurrent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) * updates, but we only hold a shared mmap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) * lock here, so we need to protect against
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) * concurrent vma expansions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) * anon_vma_lock_write() doesn't help here, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) * we don't guarantee that all growable vmas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) * in a mm share the same root anon vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) * So, we reuse mm->page_table_lock to guard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) * against concurrent vma expansions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) spin_lock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) if (vma->vm_flags & VM_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) mm->locked_vm += grow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) vm_stat_account(mm, vma->vm_flags, grow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) anon_vma_interval_tree_pre_update_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) WRITE_ONCE(vma->vm_start, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) WRITE_ONCE(vma->vm_pgoff, vma->vm_pgoff - grow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) anon_vma_interval_tree_post_update_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) vma_gap_update(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) spin_unlock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) perf_event_mmap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) anon_vma_unlock_write(vma->anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) khugepaged_enter_vma_merge(vma, vma->vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) validate_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) /* enforced gap between the expanding stack and other mappings. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) static int __init cmdline_parse_stack_guard_gap(char *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) unsigned long val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) char *endptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) val = simple_strtoul(p, &endptr, 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) if (!*endptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) stack_guard_gap = val << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) #ifdef CONFIG_STACK_GROWSUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) int expand_stack(struct vm_area_struct *vma, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) return expand_upwards(vma, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) struct vm_area_struct *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) find_extend_vma(struct mm_struct *mm, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) struct vm_area_struct *vma, *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) addr &= PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) vma = find_vma_prev(mm, addr, &prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) if (vma && (vma->vm_start <= addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) return vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) /* don't alter vm_end if the coredump is running */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) if (!prev || expand_stack(prev, addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) if (prev->vm_flags & VM_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) populate_vma_page_range(prev, addr, prev->vm_end, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) return prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) int expand_stack(struct vm_area_struct *vma, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) return expand_downwards(vma, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) struct vm_area_struct *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) find_extend_vma(struct mm_struct *mm, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) unsigned long start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) addr &= PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) vma = find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) if (!vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) if (vma->vm_start <= addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) return vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) if (!(vma->vm_flags & VM_GROWSDOWN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) start = vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) if (expand_stack(vma, addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) if (vma->vm_flags & VM_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) populate_vma_page_range(vma, addr, start, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) return vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) EXPORT_SYMBOL_GPL(find_extend_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) * Ok - we have the memory areas we should free on the vma list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) * so release them, and do the vma updates.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) * Called with the mm semaphore held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) unsigned long nr_accounted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) /* Update high watermark before we lower total_vm */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) update_hiwater_vm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) long nrpages = vma_pages(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) if (vma->vm_flags & VM_ACCOUNT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) nr_accounted += nrpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) vm_stat_account(mm, vma->vm_flags, -nrpages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) vma = remove_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) } while (vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) vm_unacct_memory(nr_accounted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) validate_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) * Get rid of page table information in the indicated region.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) * Called with the mm semaphore held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) static void unmap_region(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) struct vm_area_struct *vma, struct vm_area_struct *prev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) unsigned long start, unsigned long end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) struct vm_area_struct *next = vma_next(mm, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) struct mmu_gather tlb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) lru_add_drain();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) tlb_gather_mmu(&tlb, mm, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) update_hiwater_rss(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) unmap_vmas(&tlb, vma, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) next ? next->vm_start : USER_PGTABLES_CEILING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) tlb_finish_mmu(&tlb, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) * Create a list of vma's touched by the unmap, removing them from the mm's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) * vma list as we go..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) static bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) struct vm_area_struct *prev, unsigned long end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) struct vm_area_struct **insertion_point;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) struct vm_area_struct *tail_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) insertion_point = (prev ? &prev->vm_next : &mm->mmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) vma->vm_prev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) vma_rb_erase(vma, mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) mm->map_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) tail_vma = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) vma = vma->vm_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) } while (vma && vma->vm_start < end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) *insertion_point = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) if (vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) vma->vm_prev = prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) vma_gap_update(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) tail_vma->vm_next = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) /* Kill the cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) vmacache_invalidate(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) * VM_GROWSUP VMA. Such VMAs can change their size under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) * down_read(mmap_lock) and collide with the VMA we are about to unmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) if (vma && (vma->vm_flags & VM_GROWSDOWN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) if (prev && (prev->vm_flags & VM_GROWSUP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) * __split_vma() bypasses sysctl_max_map_count checking. We use this where it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) * has already been checked or doesn't make sense to fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) unsigned long addr, int new_below)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) struct vm_area_struct *new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) if (vma->vm_ops && vma->vm_ops->split) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) err = vma->vm_ops->split(vma, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) new = vm_area_dup(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) if (!new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) if (new_below)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) new->vm_end = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) new->vm_start = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) err = vma_dup_policy(vma, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) goto out_free_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) err = anon_vma_clone(new, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) goto out_free_mpol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) if (new->vm_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) get_file(new->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) if (new->vm_ops && new->vm_ops->open)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) new->vm_ops->open(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) if (new_below)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) ((addr - new->vm_start) >> PAGE_SHIFT), new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) /* Success. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) /* Clean everything up if vma_adjust failed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) if (new->vm_ops && new->vm_ops->close)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) new->vm_ops->close(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) if (new->vm_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) fput(new->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) unlink_anon_vmas(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) out_free_mpol:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) mpol_put(vma_policy(new));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) out_free_vma:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) vm_area_free(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) * Split a vma into two pieces at address 'addr', a new vma is allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) * either for the first part or the tail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) unsigned long addr, int new_below)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) if (mm->map_count >= sysctl_max_map_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) return __split_vma(mm, vma, addr, new_below);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) /* Munmap is split into 2 main parts -- this part which finds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) * what needs doing, and the areas themselves, which do the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) * work. This now handles partial unmappings.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) * Jeremy Fitzhardinge <jeremy@goop.org>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) struct list_head *uf, bool downgrade)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) unsigned long end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) struct vm_area_struct *vma, *prev, *last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) len = PAGE_ALIGN(len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) end = start + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) if (len == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) * arch_unmap() might do unmaps itself. It must be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) * and finish any rbtree manipulation before this code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) * runs and also starts to manipulate the rbtree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) arch_unmap(mm, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) /* Find the first overlapping VMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) vma = find_vma(mm, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) if (!vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) prev = vma->vm_prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) /* we have start < vma->vm_end */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) /* if it doesn't overlap, we have nothing.. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) if (vma->vm_start >= end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) * If we need to split any vma, do it now to save pain later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) * unmapped vm_area_struct will remain in use: so lower split_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) * places tmp vma above, and higher split_vma places tmp vma below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) if (start > vma->vm_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) * Make sure that map_count on return from munmap() will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) * not exceed its limit; but let map_count go just above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) * its limit temporarily, to help free resources as expected.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) error = __split_vma(mm, vma, start, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) prev = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) /* Does it split the last one? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) last = find_vma(mm, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) if (last && end > last->vm_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) int error = __split_vma(mm, last, end, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) vma = vma_next(mm, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) if (unlikely(uf)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) * If userfaultfd_unmap_prep returns an error the vmas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) * will remain splitted, but userland will get a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) * highly unexpected error anyway. This is no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) * different than the case where the first of the two
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) * __split_vma fails, but we don't undo the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) * split, despite we could. This is unlikely enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) * failure that it's not worth optimizing it for.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) int error = userfaultfd_unmap_prep(vma, start, end, uf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) * unlock any mlock()ed ranges before detaching vmas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) if (mm->locked_vm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) struct vm_area_struct *tmp = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) while (tmp && tmp->vm_start < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) if (tmp->vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) mm->locked_vm -= vma_pages(tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) munlock_vma_pages_all(tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) tmp = tmp->vm_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) /* Detach vmas from rbtree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) downgrade = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) if (downgrade)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) mmap_write_downgrade(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) unmap_region(mm, vma, prev, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) /* Fix up all other VM information */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) remove_vma_list(mm, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) return downgrade ? 1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) struct list_head *uf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) return __do_munmap(mm, start, len, uf, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) LIST_HEAD(uf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) if (mmap_write_lock_killable(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) ret = __do_munmap(mm, start, len, &uf, downgrade);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) * Returning 1 indicates mmap_lock is downgraded.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) * But 1 is not legal return value of vm_munmap() and munmap(), reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) * it to 0 before return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) if (ret == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) mmap_write_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) userfaultfd_unmap_complete(mm, &uf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) int vm_munmap(unsigned long start, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) return __vm_munmap(start, len, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) EXPORT_SYMBOL(vm_munmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) addr = untagged_addr(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) profile_munmap(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) return __vm_munmap(addr, len, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) * Emulation of deprecated remap_file_pages() syscall.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) unsigned long populate = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) unsigned long ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) current->comm, current->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) if (prot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) start = start & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) size = size & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) if (start + size <= start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) /* Does pgoff wrap? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) if (pgoff + (size >> PAGE_SHIFT) < pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) if (mmap_write_lock_killable(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) vma = find_vma(mm, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) if (!vma || !(vma->vm_flags & VM_SHARED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) if (start < vma->vm_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) if (start + size > vma->vm_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) struct vm_area_struct *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) for (next = vma->vm_next; next; next = next->vm_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) /* hole between vmas ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) if (next->vm_start != next->vm_prev->vm_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) if (next->vm_file != vma->vm_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) if (next->vm_flags != vma->vm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) if (start + size <= next->vm_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) if (!next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) flags &= MAP_NONBLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) if (vma->vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) struct vm_area_struct *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) flags |= MAP_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) /* drop PG_Mlocked flag for over-mapped range */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) for (tmp = vma; tmp->vm_start >= start + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) tmp = tmp->vm_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) * Split pmd and munlock page on the border
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) * of the range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) vma_adjust_trans_huge(tmp, start, start + size, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) munlock_vma_pages_range(tmp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) max(tmp->vm_start, start),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) min(tmp->vm_end, start + size));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) file = get_file(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) ret = do_mmap(vma->vm_file, start, size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) prot, flags, pgoff, &populate, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) mmap_write_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) if (populate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) mm_populate(ret, populate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) if (!IS_ERR_VALUE(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) * this is really a simplified "do_mmap". it only handles
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) * anonymous maps. eventually we may be able to do some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) * brk-specific accounting here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) struct vm_area_struct *vma, *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) struct rb_node **rb_link, *rb_parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) pgoff_t pgoff = addr >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) unsigned long mapped_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) /* Until we need other flags, refuse anything except VM_EXEC. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) if ((flags & (~VM_EXEC)) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) if (IS_ERR_VALUE(mapped_addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) return mapped_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) error = mlock_future_check(mm, mm->def_flags, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) /* Clear old maps, set up prev, rb_link, rb_parent, and uf */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) /* Check against address space limits *after* clearing old maps... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) if (mm->map_count > sysctl_max_map_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) /* Can we just expand an old private anonymous mapping? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) vma = vma_merge(mm, prev, addr, addr + len, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) if (vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) * create a vma struct for an anonymous mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) vma = vm_area_alloc(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) if (!vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) vm_unacct_memory(len >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) vma_set_anonymous(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) vma->vm_start = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) vma->vm_end = addr + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) vma->vm_pgoff = pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) vma->vm_flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) vma->vm_page_prot = vm_get_page_prot(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) vma_link(mm, vma, prev, rb_link, rb_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) perf_event_mmap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) mm->total_vm += len >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) mm->data_vm += len >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) if (flags & VM_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) mm->locked_vm += (len >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) vma->vm_flags |= VM_SOFTDIRTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) unsigned long len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) bool populate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) LIST_HEAD(uf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) len = PAGE_ALIGN(request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) if (len < request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) if (!len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) if (mmap_write_lock_killable(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) ret = do_brk_flags(addr, len, flags, &uf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) populate = ((mm->def_flags & VM_LOCKED) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) mmap_write_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) userfaultfd_unmap_complete(mm, &uf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) if (populate && !ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) mm_populate(addr, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) EXPORT_SYMBOL(vm_brk_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) int vm_brk(unsigned long addr, unsigned long len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) return vm_brk_flags(addr, len, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) EXPORT_SYMBOL(vm_brk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) /* Release all mmaps. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) void exit_mmap(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) struct mmu_gather tlb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) unsigned long nr_accounted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) /* mm's last user has gone, and its about to be pulled down */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) mmu_notifier_release(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) if (unlikely(mm_is_oom_victim(mm))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) * Manually reap the mm to free as much memory as possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) * this mm from further consideration. Taking mm->mmap_lock for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) * write after setting MMF_OOM_SKIP will guarantee that the oom
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) * reaper will not run on this mm again after mmap_lock is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) * dropped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) * Nothing can be holding mm->mmap_lock here and the above call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) * to mmu_notifier_release(mm) ensures mmu notifier callbacks in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) * __oom_reap_task_mm() will not block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) * This needs to be done before calling munlock_vma_pages_all(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) * which clears VM_LOCKED, otherwise the oom reaper cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) * reliably test it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) (void)__oom_reap_task_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) set_bit(MMF_OOM_SKIP, &mm->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) mmap_write_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) if (mm->locked_vm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) vma = mm->mmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) while (vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) if (vma->vm_flags & VM_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) munlock_vma_pages_all(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) vma = vma->vm_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) arch_exit_mmap(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) vma = mm->mmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) if (!vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) /* Can happen if dup_mmap() received an OOM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) mmap_write_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) lru_add_drain();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) flush_cache_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) tlb_gather_mmu(&tlb, mm, 0, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) /* update_hiwater_rss(mm) here? but nobody should be looking */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) /* Use -1 here to ensure all VMAs in the mm are unmapped */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) unmap_vmas(&tlb, vma, 0, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) tlb_finish_mmu(&tlb, 0, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) /* Walk the list again, actually closing and freeing it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) while (vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) if (vma->vm_flags & VM_ACCOUNT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) nr_accounted += vma_pages(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) vma = remove_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) mmap_write_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) vm_unacct_memory(nr_accounted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) /* Insert vm structure into process list sorted by address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) * and into the inode's i_mmap tree. If vm_file is non-NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) * then i_mmap_rwsem is taken here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) struct vm_area_struct *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) struct rb_node **rb_link, *rb_parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) if (find_vma_links(mm, vma->vm_start, vma->vm_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) &prev, &rb_link, &rb_parent))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) if ((vma->vm_flags & VM_ACCOUNT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) security_vm_enough_memory_mm(mm, vma_pages(vma)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) * The vm_pgoff of a purely anonymous vma should be irrelevant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) * until its first write fault, when page's anon_vma and index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) * are set. But now set the vm_pgoff it will almost certainly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) * end up with (unless mremap moves it elsewhere before that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) * first wfault), so /proc/pid/maps tells a consistent story.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) * By setting it to reflect the virtual start address of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) * vma, merges and splits can happen in a seamless way, just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) * using the existing file pgoff checks and manipulations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) * Similarly in do_mmap and in do_brk_flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) if (vma_is_anonymous(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) BUG_ON(vma->anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) vma_link(mm, vma, prev, rb_link, rb_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) * Copy the vma structure to a new location in the same mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) * prior to moving page table entries, to effect an mremap move.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) unsigned long addr, unsigned long len, pgoff_t pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) bool *need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) struct vm_area_struct *vma = *vmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) unsigned long vma_start = vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) struct vm_area_struct *new_vma, *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) struct rb_node **rb_link, *rb_parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) bool faulted_in_anon_vma = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) * If anonymous vma has not yet been faulted, update new pgoff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) * to match new location, to increase its chance of merging.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) pgoff = addr >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) faulted_in_anon_vma = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) return NULL; /* should never get here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) /* There is 3 cases to manage here in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) * AAAA AAAA AAAA AAAA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) * PPPP.... PPPP......NNNN PPPP....NNNN PP........NN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) * PPPPPPPP(A) PPPP..NNNNNNNN(B) PPPPPPPPPPPP(1) NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) * PPPPPPPPNNNN(2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) * PPPPNNNNNNNN(3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) * new_vma == prev in case A,1,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) * new_vma == next in case B,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) new_vma = __vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) vma->anon_vma, vma->vm_file, pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) vma_policy(vma), vma->vm_userfaultfd_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) vma_get_anon_name(vma), true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) if (new_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) * Source vma may have been merged into new_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) if (unlikely(vma_start >= new_vma->vm_start &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) vma_start < new_vma->vm_end)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) * The only way we can get a vma_merge with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) * self during an mremap is if the vma hasn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) * been faulted in yet and we were allowed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) * reset the dst vma->vm_pgoff to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) * destination address of the mremap to allow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) * the merge to happen. mremap must change the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) * vm_pgoff linearity between src and dst vmas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) * (in turn preventing a vma_merge) to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) * safe. It is only safe to keep the vm_pgoff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) * linear if there are no pages mapped yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) *vmap = vma = new_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) new_vma = vm_area_dup(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) if (!new_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) new_vma->vm_start = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) new_vma->vm_end = addr + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) new_vma->vm_pgoff = pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) if (vma_dup_policy(vma, new_vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) goto out_free_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) if (anon_vma_clone(new_vma, vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) goto out_free_mempol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) if (new_vma->vm_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) get_file(new_vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) if (new_vma->vm_ops && new_vma->vm_ops->open)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) new_vma->vm_ops->open(new_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) * As the VMA is linked right now, it may be hit by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) * speculative page fault handler. But we don't want it to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) * to start mapping page in this area until the caller has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) * potentially move the pte from the moved VMA. To prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) * that we protect it right now, and let the caller unprotect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) * it once the move is done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) vm_write_begin(new_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) vma_link(mm, new_vma, prev, rb_link, rb_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) *need_rmap_locks = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) return new_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) out_free_mempol:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) mpol_put(vma_policy(new_vma));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) out_free_vma:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) vm_area_free(new_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) * Return true if the calling process may expand its vm space by the passed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) * number of pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) if (is_data_mapping(flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) /* Workaround for Valgrind */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) if (rlimit(RLIMIT_DATA) == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) current->comm, current->pid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) (mm->data_vm + npages) << PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) rlimit(RLIMIT_DATA),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) if (!ignore_rlimit_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) mm->total_vm += npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) if (is_exec_mapping(flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) mm->exec_vm += npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) else if (is_stack_mapping(flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) mm->stack_vm += npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) else if (is_data_mapping(flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) mm->data_vm += npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) * Having a close hook prevents vma merging regardless of flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) static void special_mapping_close(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) static const char *special_mapping_name(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) return ((struct vm_special_mapping *)vma->vm_private_data)->name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) static int special_mapping_mremap(struct vm_area_struct *new_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) struct vm_special_mapping *sm = new_vma->vm_private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) if (sm->mremap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) return sm->mremap(sm, new_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) static const struct vm_operations_struct special_mapping_vmops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) .close = special_mapping_close,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) .fault = special_mapping_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) .mremap = special_mapping_mremap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) .name = special_mapping_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) /* vDSO code relies that VVAR can't be accessed remotely */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) .access = NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) static const struct vm_operations_struct legacy_special_mapping_vmops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) .close = special_mapping_close,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) .fault = special_mapping_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) static vm_fault_t special_mapping_fault(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) struct vm_area_struct *vma = vmf->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) pgoff_t pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) struct page **pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) if (vma->vm_ops == &legacy_special_mapping_vmops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) pages = vma->vm_private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) struct vm_special_mapping *sm = vma->vm_private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) if (sm->fault)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) return sm->fault(sm, vmf->vma, vmf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) pages = sm->pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) pgoff--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) if (*pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) struct page *page = *pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) vmf->page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) return VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) static struct vm_area_struct *__install_special_mapping(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) unsigned long addr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) unsigned long vm_flags, void *priv,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) const struct vm_operations_struct *ops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) vma = vm_area_alloc(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) if (unlikely(vma == NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) vma->vm_start = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) vma->vm_end = addr + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) vma->vm_ops = ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) vma->vm_private_data = priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) ret = insert_vm_struct(mm, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) perf_event_mmap(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) return vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) vm_area_free(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) bool vma_is_special_mapping(const struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) const struct vm_special_mapping *sm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) return vma->vm_private_data == sm &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) (vma->vm_ops == &special_mapping_vmops ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) vma->vm_ops == &legacy_special_mapping_vmops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) * Called with mm->mmap_lock held for writing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) * Insert a new vma covering the given region, with the given flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) * Its pages are supplied by the given array of struct page *.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) * The region past the last page supplied will always produce SIGBUS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) * The array pointer and the pages it points to are assumed to stay alive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) * for as long as this mapping might exist.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) struct vm_area_struct *_install_special_mapping(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) unsigned long addr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) unsigned long vm_flags, const struct vm_special_mapping *spec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) &special_mapping_vmops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) int install_special_mapping(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) unsigned long addr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) unsigned long vm_flags, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) struct vm_area_struct *vma = __install_special_mapping(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) mm, addr, len, vm_flags, (void *)pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) &legacy_special_mapping_vmops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) return PTR_ERR_OR_ZERO(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) static DEFINE_MUTEX(mm_all_locks_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) * The LSB of head.next can't change from under us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) * because we hold the mm_all_locks_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) * We can safely modify head.next after taking the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) * anon_vma->root->rwsem. If some other vma in this mm shares
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) * the same anon_vma we won't take it again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) * No need of atomic instructions here, head.next
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) * can't change from under us thanks to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) * anon_vma->root->rwsem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) if (__test_and_set_bit(0, (unsigned long *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) &anon_vma->root->rb_root.rb_root.rb_node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) * AS_MM_ALL_LOCKS can't change from under us because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) * we hold the mm_all_locks_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) * Operations on ->flags have to be atomic because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) * even if AS_MM_ALL_LOCKS is stable thanks to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) * mm_all_locks_mutex, there may be other cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) * changing other bitflags in parallel to us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) * This operation locks against the VM for all pte/vma/mm related
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) * operations that could ever happen on a certain mm. This includes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) * vmtruncate, try_to_unmap, and all page faults.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) * The caller must take the mmap_lock in write mode before calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) * mm_take_all_locks(). The caller isn't allowed to release the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) * mmap_lock until mm_drop_all_locks() returns.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) * mmap_lock in write mode is required in order to block all operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) * that could modify pagetables and free pages without need of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) * altering the vma layout. It's also needed in write mode to avoid new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) * anon_vmas to be associated with existing vmas.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) * A single task can't take more than one mm_take_all_locks() in a row
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) * or it would deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) * The LSB in anon_vma->rb_root.rb_node and the AS_MM_ALL_LOCKS bitflag in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) * mapping->flags avoid to take the same lock twice, if more than one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) * vma in this mm is backed by the same anon_vma or address_space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) * We take locks in following order, accordingly to comment at beginning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) * of mm/rmap.c:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) * - all hugetlbfs_i_mmap_rwsem_key locks (aka mapping->i_mmap_rwsem for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) * hugetlb mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) * - all i_mmap_rwsem locks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) * - all anon_vma->rwseml
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) * We can take all locks within these types randomly because the VM code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) * doesn't nest them and we protected from parallel mm_take_all_locks() by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) * mm_all_locks_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) * mm_take_all_locks() and mm_drop_all_locks are expensive operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) * that may have to take thousand of locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) * mm_take_all_locks() can fail if it's interrupted by signals.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) int mm_take_all_locks(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) struct anon_vma_chain *avc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) BUG_ON(mmap_read_trylock(mm));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) mutex_lock(&mm_all_locks_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) for (vma = mm->mmap; vma; vma = vma->vm_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) if (signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) if (vma->vm_file && vma->vm_file->f_mapping &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) is_vm_hugetlb_page(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) vm_lock_mapping(mm, vma->vm_file->f_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) for (vma = mm->mmap; vma; vma = vma->vm_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) if (signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) if (vma->vm_file && vma->vm_file->f_mapping &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) !is_vm_hugetlb_page(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) vm_lock_mapping(mm, vma->vm_file->f_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) for (vma = mm->mmap; vma; vma = vma->vm_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) if (signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) if (vma->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774) vm_lock_anon_vma(mm, avc->anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) mm_drop_all_locks(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) * The LSB of head.next can't change to 0 from under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) * us because we hold the mm_all_locks_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) * We must however clear the bitflag before unlocking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) * the vma so the users using the anon_vma->rb_root will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) * never see our bitflag.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) * No need of atomic instructions here, head.next
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) * can't change from under us until we release the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) * anon_vma->root->rwsem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) if (!__test_and_clear_bit(0, (unsigned long *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) &anon_vma->root->rb_root.rb_root.rb_node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) anon_vma_unlock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) static void vm_unlock_mapping(struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) * AS_MM_ALL_LOCKS can't change to 0 from under us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) * because we hold the mm_all_locks_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) i_mmap_unlock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) &mapping->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) * The mmap_lock cannot be released by the caller until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) * mm_drop_all_locks() returns.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) void mm_drop_all_locks(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) struct anon_vma_chain *avc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) BUG_ON(mmap_read_trylock(mm));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) for (vma = mm->mmap; vma; vma = vma->vm_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) if (vma->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) vm_unlock_anon_vma(avc->anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) if (vma->vm_file && vma->vm_file->f_mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) vm_unlock_mapping(vma->vm_file->f_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) mutex_unlock(&mm_all_locks_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) * initialise the percpu counter for VM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) void __init mmap_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) VM_BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855) * Initialise sysctl_user_reserve_kbytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) * This is intended to prevent a user from starting a single memory hogging
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) * mode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) * The default value is min(3% of free memory, 128MB)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) * 128MB is enough to recover with sshd/login, bash, and top/kill.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) static int init_user_reserve(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) unsigned long free_kbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) subsys_initcall(init_user_reserve);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) * Initialise sysctl_admin_reserve_kbytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) * to log in and kill a memory hogging process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) * Systems with more than 256MB will reserve 8MB, enough to recover
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) * only reserve 3% of free pages by default.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) static int init_admin_reserve(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) unsigned long free_kbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891) sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) subsys_initcall(init_admin_reserve);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) * Reinititalise user and admin reserves if memory is added or removed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) * The default user reserve max is 128MB, and the default max for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) * admin reserve is 8MB. These are usually, but not always, enough to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) * enable recovery from a memory hogging process using login/sshd, a shell,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) * and tools like top. It may make sense to increase or even disable the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) * reserve depending on the existence of swap or variations in the recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) * tools. So, the admin may have changed them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) * If memory is added and the reserves have been eliminated or increased above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) * the default max, then we'll trust the admin.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) * If memory is removed and there isn't enough free memory, then we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) * need to reset the reserves.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) * Otherwise keep the reserve set by the admin.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) static int reserve_mem_notifier(struct notifier_block *nb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) unsigned long action, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) unsigned long tmp, free_kbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) switch (action) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) case MEM_ONLINE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) /* Default max is 128MB. Leave alone if modified by operator. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) tmp = sysctl_user_reserve_kbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) if (0 < tmp && tmp < (1UL << 17))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) init_user_reserve();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) /* Default max is 8MB. Leave alone if modified by operator. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) tmp = sysctl_admin_reserve_kbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) if (0 < tmp && tmp < (1UL << 13))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) init_admin_reserve();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) case MEM_OFFLINE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) if (sysctl_user_reserve_kbytes > free_kbytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936) init_user_reserve();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) pr_info("vm.user_reserve_kbytes reset to %lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) sysctl_user_reserve_kbytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) if (sysctl_admin_reserve_kbytes > free_kbytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) init_admin_reserve();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) pr_info("vm.admin_reserve_kbytes reset to %lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) sysctl_admin_reserve_kbytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) return NOTIFY_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953) static struct notifier_block reserve_mem_nb = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) .notifier_call = reserve_mem_notifier,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) static int __meminit init_reserve_notifier(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) if (register_hotmemory_notifier(&reserve_mem_nb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) pr_err("Failed registering memory add/remove notifier for admin reserve\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) subsys_initcall(init_reserve_notifier);