^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * mm/mremap.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * (C) Copyright 1996 Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Address space accounting code <alan@lxorguk.ukuu.org.uk>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * (C) Copyright 2002 Red Hat Inc, All Rights Reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/shm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/ksm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/capability.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/swapops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/security.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/mmu_notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/mm-arch-hooks.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/userfaultfd_k.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <asm/cacheflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) static pud_t *get_old_pud(struct mm_struct *mm, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) pgd = pgd_offset(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) if (pgd_none_or_clear_bad(pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) p4d = p4d_offset(pgd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) if (p4d_none_or_clear_bad(p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) pud = pud_offset(p4d, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) if (pud_none_or_clear_bad(pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) return pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) pud = get_old_pud(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) if (!pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) pmd = pmd_offset(pud, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) if (pmd_none(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) return pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) static pud_t *alloc_new_pud(struct mm_struct *mm, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) pgd = pgd_offset(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) p4d = p4d_alloc(mm, pgd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) if (!p4d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) return pud_alloc(mm, p4d, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) pud = alloc_new_pud(mm, vma, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) if (!pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) pmd = pmd_alloc(mm, pud, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) if (!pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) VM_BUG_ON(pmd_trans_huge(*pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) return pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) static void take_rmap_locks(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) if (vma->vm_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) i_mmap_lock_write(vma->vm_file->f_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) if (vma->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) anon_vma_lock_write(vma->anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) static void drop_rmap_locks(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) if (vma->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) anon_vma_unlock_write(vma->anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) if (vma->vm_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) i_mmap_unlock_write(vma->vm_file->f_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) static pte_t move_soft_dirty_pte(pte_t pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * Set soft dirty bit so we can notice
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * in userspace the ptes were moved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) #ifdef CONFIG_MEM_SOFT_DIRTY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) if (pte_present(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) pte = pte_mksoft_dirty(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) else if (is_swap_pte(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) pte = pte_swp_mksoft_dirty(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) return pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) unsigned long old_addr, unsigned long old_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) struct vm_area_struct *new_vma, pmd_t *new_pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) unsigned long new_addr, bool need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) pte_t *old_pte, *new_pte, pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) spinlock_t *old_ptl, *new_ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) bool force_flush = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) unsigned long len = old_end - old_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * locks to ensure that rmap will always observe either the old or the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * new ptes. This is the easiest way to avoid races with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * truncate_pagecache(), page migration, etc...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * When need_rmap_locks is false, we use other ways to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * such races:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * - During exec() shift_arg_pages(), we use a specially tagged vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * which rmap call sites look for using vma_is_temporary_stack().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * - During mremap(), new_vma is often known to be placed after vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * in rmap traversal order. This ensures rmap will always observe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * either the old pte, or the new pte, or both (the page table locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * serialize access to individual ptes, but only rmap traversal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * order guarantees that we won't miss both the old and new ptes).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) if (need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) take_rmap_locks(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * We don't have to worry about the ordering of src and dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * pte locks because exclusive mmap_lock prevents deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) new_pte = pte_offset_map(new_pmd, new_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) new_ptl = pte_lockptr(mm, new_pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) if (new_ptl != old_ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) flush_tlb_batched_pending(vma->vm_mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) arch_enter_lazy_mmu_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) new_pte++, new_addr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) if (pte_none(*old_pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) pte = ptep_get_and_clear(mm, old_addr, old_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * If we are remapping a valid PTE, make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * to flush TLB before we drop the PTL for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * PTE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * NOTE! Both old and new PTL matter: the old one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) * for racing with page_mkclean(), the new one to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * make sure the physical page stays valid until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) * the TLB entry for the old mapping has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) * flushed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) if (pte_present(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) force_flush = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) pte = move_soft_dirty_pte(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) set_pte_at(mm, new_addr, new_pte, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) arch_leave_lazy_mmu_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) if (force_flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) flush_tlb_range(vma, old_end - len, old_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) if (new_ptl != old_ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) spin_unlock(new_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) pte_unmap(new_pte - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) pte_unmap_unlock(old_pte - 1, old_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) if (need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) drop_rmap_locks(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) #ifdef CONFIG_HAVE_MOVE_PMD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) spinlock_t *old_ptl, *new_ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) pmd_t pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) * The destination pmd shouldn't be established, free_pgtables()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * should have released it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * However, there's a case during execve() where we use mremap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * to move the initial stack, and in that case the target area
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * may overlap the source area (always moving down).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * If everything is PMD-aligned, that works fine, as moving
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * each pmd down will clear the source pmd. But if we first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * have a few 4kB-only pages that get moved down, and then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * hit the "now the rest is PMD-aligned, let's do everything
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * one pmd at a time", we will still have the old (now empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) * of any 4kB pages, but still there) PMD in the page table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * Warn on it once - because we really should try to figure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * out how to do this better - but then say "I won't move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * this pmd".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * One alternative might be to just unmap the target pmd at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * this point, and verify that it really is empty. We'll see.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * We don't have to worry about the ordering of src and dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * ptlocks because exclusive mmap_lock prevents deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) old_ptl = pmd_lock(vma->vm_mm, old_pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) new_ptl = pmd_lockptr(mm, new_pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) if (new_ptl != old_ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) /* Clear the pmd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) pmd = *old_pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) pmd_clear(old_pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) VM_BUG_ON(!pmd_none(*new_pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) /* Set the new pmd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) set_pmd_at(mm, new_addr, new_pmd, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) if (new_ptl != old_ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) spin_unlock(new_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) spin_unlock(old_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) static inline bool move_normal_pmd(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) pmd_t *new_pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) #ifdef CONFIG_HAVE_MOVE_PUD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) spinlock_t *old_ptl, *new_ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) pud_t pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) * The destination pud shouldn't be established, free_pgtables()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) * should have released it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) if (WARN_ON_ONCE(!pud_none(*new_pud)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * We don't have to worry about the ordering of src and dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * ptlocks because exclusive mmap_lock prevents deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) old_ptl = pud_lock(vma->vm_mm, old_pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) new_ptl = pud_lockptr(mm, new_pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) if (new_ptl != old_ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) /* Clear the pud */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) pud = *old_pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) pud_clear(old_pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) VM_BUG_ON(!pud_none(*new_pud));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) /* Set the new pud */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) set_pud_at(mm, new_addr, new_pud, pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) if (new_ptl != old_ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) spin_unlock(new_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) spin_unlock(old_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) static inline bool move_normal_pud(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) unsigned long old_addr, unsigned long new_addr, pud_t *old_pud,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) pud_t *new_pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) enum pgt_entry {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) NORMAL_PMD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) HPAGE_PMD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) NORMAL_PUD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) * Returns an extent of the corresponding size for the pgt_entry specified if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * valid. Else returns a smaller extent bounded by the end of the source and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * destination pgt_entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) static __always_inline unsigned long get_extent(enum pgt_entry entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) unsigned long old_addr, unsigned long old_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) unsigned long new_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) unsigned long next, extent, mask, size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) switch (entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) case HPAGE_PMD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) case NORMAL_PMD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) mask = PMD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) size = PMD_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) case NORMAL_PUD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) mask = PUD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) size = PUD_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) BUILD_BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) next = (old_addr + size) & mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) /* even if next overflowed, extent below will be ok */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) extent = next - old_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) if (extent > old_end - old_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) extent = old_end - old_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) next = (new_addr + size) & mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) if (extent > next - new_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) extent = next - new_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) return extent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) * Attempts to speedup the move by moving entry at the level corresponding to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) * pgt_entry. Returns true if the move was successful, else false.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) unsigned long old_addr, unsigned long new_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) void *old_entry, void *new_entry, bool need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) bool moved = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) /* See comment in move_ptes() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) if (need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) take_rmap_locks(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) switch (entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) case NORMAL_PMD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) moved = move_normal_pmd(vma, old_addr, new_addr, old_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) new_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) case NORMAL_PUD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) moved = move_normal_pud(vma, old_addr, new_addr, old_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) new_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) case HPAGE_PMD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) move_huge_pmd(vma, old_addr, new_addr, old_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) new_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) if (need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) drop_rmap_locks(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) return moved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) unsigned long move_page_tables(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) unsigned long old_addr, struct vm_area_struct *new_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) unsigned long new_addr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) bool need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) unsigned long extent, old_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) struct mmu_notifier_range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) pmd_t *old_pmd, *new_pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) old_end = old_addr + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) flush_cache_range(vma, old_addr, old_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) old_addr, old_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) mmu_notifier_invalidate_range_start(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) * If extent is PUD-sized try to speed up the move by moving at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) * PUD level if possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) extent = get_extent(NORMAL_PUD, old_addr, old_end, new_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) pud_t *old_pud, *new_pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) old_pud = get_old_pud(vma->vm_mm, old_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) if (!old_pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) if (!new_pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) old_pud, new_pud, true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) extent = get_extent(NORMAL_PMD, old_addr, old_end, new_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) old_pmd = get_old_pmd(vma->vm_mm, old_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) if (!old_pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) if (!new_pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) pmd_devmap(*old_pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) if (extent == HPAGE_PMD_SIZE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) move_pgt_entry(HPAGE_PMD, vma, old_addr, new_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) old_pmd, new_pmd, need_rmap_locks))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) split_huge_pmd(vma, old_pmd, old_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) if (pmd_trans_unstable(old_pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PMD) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) extent == PMD_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) * If the extent is PMD-sized, try to speed the move by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * moving at the PMD level if possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) if (move_pgt_entry(NORMAL_PMD, vma, old_addr, new_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) old_pmd, new_pmd, true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) if (pte_alloc(new_vma->vm_mm, new_pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) new_pmd, new_addr, need_rmap_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) mmu_notifier_invalidate_range_end(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) return len + old_addr - old_end; /* how much done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) static unsigned long move_vma(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) unsigned long old_addr, unsigned long old_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) unsigned long new_len, unsigned long new_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) bool *locked, unsigned long flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) struct vm_area_struct *new_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) unsigned long vm_flags = vma->vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) unsigned long new_pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) unsigned long moved_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) unsigned long excess = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) unsigned long hiwater_vm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) int split = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) bool need_rmap_locks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) * We'd prefer to avoid failure later on in do_munmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) * which may split one vma into three before unmapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) if (mm->map_count >= sysctl_max_map_count - 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * Advise KSM to break any KSM pages in the area to be moved:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * it would be confusing if they were to turn up at the new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * location, where they happen to coincide with different KSM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * pages recently unmapped. But leave vma->vm_flags as it was,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) * so KSM can come around to merge on vma and new_vma afterwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) err = ksm_madvise(vma, old_addr, old_addr + old_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) MADV_UNMERGEABLE, &vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) &need_rmap_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) if (!new_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) /* new_vma is returned protected by copy_vma, to prevent speculative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * page fault to be done in the destination area before we move the pte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * Now, we must also protect the source VMA since we don't want pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * to be mapped in our back while we are copying the PTEs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) if (vma != new_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) vm_write_begin(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) need_rmap_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) if (moved_len < old_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) } else if (vma->vm_ops && vma->vm_ops->mremap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) err = vma->vm_ops->mremap(new_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) if (unlikely(err)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) * On error, move entries back from new area to old,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) * which will succeed since page tables still there,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * and then proceed to unmap new area instead of old.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) if (vma != new_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) vm_write_end(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) vma = new_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) old_len = new_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) old_addr = new_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) new_addr = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) mremap_userfaultfd_prep(new_vma, uf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) arch_remap(mm, old_addr, old_addr + old_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) new_addr, new_addr + new_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) if (vma != new_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) vm_write_end(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) vm_write_end(new_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) /* Conceal VM_ACCOUNT so old reservation is not undone */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) if (vm_flags & VM_ACCOUNT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) vma->vm_flags &= ~VM_ACCOUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) excess = vma->vm_end - vma->vm_start - old_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) if (old_addr > vma->vm_start &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) old_addr + old_len < vma->vm_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) split = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) * If we failed to move page tables we still do total_vm increment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) * since do_munmap() will decrement it by old_len == new_len.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) * Since total_vm is about to be raised artificially high for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) * moment, we need to restore high watermark afterwards: if stats
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) * are taken meanwhile, total_vm and hiwater_vm appear too high.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) * If this were a serious issue, we'd add a flag to do_munmap().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) hiwater_vm = mm->hiwater_vm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) vm_stat_account(mm, vma->vm_flags, new_len >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) /* Tell pfnmap has moved from this vma */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) if (unlikely(vma->vm_flags & VM_PFNMAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) untrack_pfn_moved(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) if (vm_flags & VM_ACCOUNT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) /* Always put back VM_ACCOUNT since we won't unmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) vma->vm_flags |= VM_ACCOUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) vm_acct_memory(new_len >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) * VMAs can actually be merged back together in copy_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) * calling merge_vma. This can happen with anonymous vmas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) * which have not yet been faulted, so if we were to consider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) * this VMA split we'll end up adding VM_ACCOUNT on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) * next VMA, which is completely unrelated if this VMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) * was re-merged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) if (split && new_vma == vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) split = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) /* We always clear VM_LOCKED[ONFAULT] on the old vma */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) /* Because we won't unmap we don't need to touch locked_vm */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) /* OOM: unable to split vma, just get accounts right */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) vm_unacct_memory(excess >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) excess = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) if (vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) mm->locked_vm += new_len >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) *locked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) mm->hiwater_vm = hiwater_vm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) /* Restore VM_ACCOUNT if one or two pieces of vma left */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) if (excess) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) vma->vm_flags |= VM_ACCOUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) if (split)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) vma->vm_next->vm_flags |= VM_ACCOUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) return new_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) static struct vm_area_struct *vma_to_resize(unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) unsigned long old_len, unsigned long new_len, unsigned long flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) unsigned long *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) struct vm_area_struct *vma = find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) unsigned long pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) if (!vma || vma->vm_start > addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) return ERR_PTR(-EFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) * !old_len is a special case where an attempt is made to 'duplicate'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) * a mapping. This makes no sense for private mappings as it will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) * instead create a fresh/new mapping unrelated to the original. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) * is contrary to the basic idea of mremap which creates new mappings
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) * based on the original. There are no known use cases for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) * behavior. As a result, fail such attempts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) if (!old_len && !(vma->vm_flags & (VM_SHARED | VM_MAYSHARE))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap. This is not supported.\n", current->comm, current->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) if ((flags & MREMAP_DONTUNMAP) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) if (is_vm_hugetlb_page(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) /* We can't remap across vm area boundaries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) if (old_len > vma->vm_end - addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) return ERR_PTR(-EFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) if (new_len == old_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) return vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) /* Need to be careful about a growing mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) pgoff += vma->vm_pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) return ERR_PTR(-EFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) if (vma->vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) unsigned long locked, lock_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) locked = mm->locked_vm << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) lock_limit = rlimit(RLIMIT_MEMLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) locked += new_len - old_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) if (locked > lock_limit && !capable(CAP_IPC_LOCK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) return ERR_PTR(-EAGAIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) if (!may_expand_vm(mm, vma->vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) (new_len - old_len) >> PAGE_SHIFT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) if (vma->vm_flags & VM_ACCOUNT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) if (security_vm_enough_memory_mm(mm, charged))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) *p = charged;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) return vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) unsigned long new_addr, unsigned long new_len, bool *locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) unsigned long flags, struct vm_userfaultfd_ctx *uf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) struct list_head *uf_unmap_early,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) struct list_head *uf_unmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) unsigned long ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) unsigned long charged = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) unsigned long map_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) if (offset_in_page(new_addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) /* Ensure the old/new locations do not overlap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) if (addr + old_len > new_addr && new_addr + new_len > addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) * move_vma() need us to stay 4 maps below the threshold, otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) * it will bail out at the very beginning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) * That is a problem if we have already unmaped the regions here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) * (new_addr, and old_addr), because userspace will not know the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) * state of the vma's after it gets -ENOMEM.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) * So, to avoid such scenario we can pre-compute if the whole
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) * operation has high chances to success map-wise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) * Worst-scenario case is when both vma's (new_addr and old_addr) get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) * split in 3 before unmaping it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) * That means 2 more maps (1 for each) to the ones we already hold.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) * Check whether current map count plus 2 still leads us to 4 maps below
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) * the threshold, otherwise return -ENOMEM here to be more safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) if (flags & MREMAP_FIXED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) if (old_len >= new_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) if (ret && old_len != new_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) old_len = new_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) if (IS_ERR(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) ret = PTR_ERR(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) /* MREMAP_DONTUNMAP expands by old_len since old_len == new_len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) if (flags & MREMAP_DONTUNMAP &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) !may_expand_vm(mm, vma->vm_flags, old_len >> PAGE_SHIFT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) if (flags & MREMAP_FIXED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) map_flags |= MAP_FIXED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) if (vma->vm_flags & VM_MAYSHARE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) map_flags |= MAP_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) ((addr - vma->vm_start) >> PAGE_SHIFT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) map_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) if (IS_ERR_VALUE(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) goto out1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) /* We got a new mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) if (!(flags & MREMAP_FIXED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) new_addr = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) uf_unmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) if (!(offset_in_page(ret)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) out1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) vm_unacct_memory(charged);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) unsigned long end = vma->vm_end + delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) if (end < vma->vm_end) /* overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) if (vma->vm_next && vma->vm_next->vm_start < end) /* intersection */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) 0, MAP_FIXED) & ~PAGE_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) * Expand (or shrink) an existing mapping, potentially moving it at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) * This option implies MREMAP_MAYMOVE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) unsigned long, new_len, unsigned long, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) unsigned long, new_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) unsigned long ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) unsigned long charged = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) bool locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) bool downgraded = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) LIST_HEAD(uf_unmap_early);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) LIST_HEAD(uf_unmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) * There is a deliberate asymmetry here: we strip the pointer tag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) * from the old address but leave the new address alone. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) * for consistency with mmap(), where we prevent the creation of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) * aliasing mappings in userspace by leaving the tag bits of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) * mapping address intact. A non-zero tag will cause the subsequent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) * range checks to reject the address as invalid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) * See Documentation/arm64/tagged-address-abi.rst for more information.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) addr = untagged_addr(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) * MREMAP_DONTUNMAP is always a move and it does not allow resizing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) * in the process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) if (flags & MREMAP_DONTUNMAP &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) (!(flags & MREMAP_MAYMOVE) || old_len != new_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) if (offset_in_page(addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) old_len = PAGE_ALIGN(old_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) new_len = PAGE_ALIGN(new_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) * We allow a zero old-len as a special case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) * for DOS-emu "duplicate shm area" thing. But
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) * a zero new-len is nonsensical.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) if (!new_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) if (mmap_write_lock_killable(current->mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) ret = mremap_to(addr, old_len, new_addr, new_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) &locked, flags, &uf, &uf_unmap_early,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) &uf_unmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) * Always allow a shrinking remap: that just unmaps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) * the unnecessary pages..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) * __do_munmap does all the needed commit accounting, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) * downgrades mmap_lock to read if so directed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) if (old_len >= new_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) int retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) retval = __do_munmap(mm, addr+new_len, old_len - new_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) &uf_unmap, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) if (retval < 0 && old_len != new_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) ret = retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) /* Returning 1 indicates mmap_lock is downgraded to read. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) } else if (retval == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) downgraded = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) ret = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) * Ok, we need to grow..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) if (IS_ERR(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) ret = PTR_ERR(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) /* old_len exactly to the end of the area..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) if (old_len == vma->vm_end - addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) /* can we just expand the current mapping? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) if (vma_expandable(vma, new_len - old_len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) int pages = (new_len - old_len) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) if (vma_adjust(vma, vma->vm_start, addr + new_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) vma->vm_pgoff, NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) vm_stat_account(mm, vma->vm_flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) if (vma->vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) mm->locked_vm += pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) locked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) new_addr = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) ret = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) * We weren't able to just expand or shrink the area,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) * we need to create a new one and move it..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) if (flags & MREMAP_MAYMOVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) unsigned long map_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) if (vma->vm_flags & VM_MAYSHARE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) map_flags |= MAP_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) vma->vm_pgoff +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) ((addr - vma->vm_start) >> PAGE_SHIFT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) map_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) if (IS_ERR_VALUE(new_addr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) ret = new_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) ret = move_vma(vma, addr, old_len, new_len, new_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) &locked, flags, &uf, &uf_unmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) if (offset_in_page(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) vm_unacct_memory(charged);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) if (downgraded)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) mmap_read_unlock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) mmap_write_unlock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) if (locked && new_len > old_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) mm_populate(new_addr + old_len, new_len - old_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) userfaultfd_unmap_complete(mm, &uf_unmap_early);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) mremap_userfaultfd_complete(&uf, addr, ret, old_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) userfaultfd_unmap_complete(mm, &uf_unmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) }