Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  *	mm/mremap.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *	(C) Copyright 1996 Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  *	Address space accounting code	<alan@lxorguk.ukuu.org.uk>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  *	(C) Copyright 2002 Red Hat Inc, All Rights Reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) #include <linux/shm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) #include <linux/ksm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) #include <linux/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) #include <linux/capability.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) #include <linux/swapops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) #include <linux/security.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) #include <linux/mmu_notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) #include <linux/mm-arch-hooks.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #include <linux/userfaultfd_k.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) #include <asm/cacheflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) static pud_t *get_old_pud(struct mm_struct *mm, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 	pgd = pgd_offset(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 	if (pgd_none_or_clear_bad(pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 	p4d = p4d_offset(pgd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	if (p4d_none_or_clear_bad(p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	pud = pud_offset(p4d, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 	if (pud_none_or_clear_bad(pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 	return pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	pud = get_old_pud(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 	if (!pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	pmd = pmd_offset(pud, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	if (pmd_none(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 	return pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) static pud_t *alloc_new_pud(struct mm_struct *mm, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 			    unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	pgd = pgd_offset(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	p4d = p4d_alloc(mm, pgd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	if (!p4d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	return pud_alloc(mm, p4d, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 			    unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	pud = alloc_new_pud(mm, vma, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	if (!pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	pmd = pmd_alloc(mm, pud, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 	if (!pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	VM_BUG_ON(pmd_trans_huge(*pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	return pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) static void take_rmap_locks(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	if (vma->vm_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 		i_mmap_lock_write(vma->vm_file->f_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	if (vma->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 		anon_vma_lock_write(vma->anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) static void drop_rmap_locks(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	if (vma->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 		anon_vma_unlock_write(vma->anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 	if (vma->vm_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 		i_mmap_unlock_write(vma->vm_file->f_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) static pte_t move_soft_dirty_pte(pte_t pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	 * Set soft dirty bit so we can notice
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	 * in userspace the ptes were moved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) #ifdef CONFIG_MEM_SOFT_DIRTY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	if (pte_present(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 		pte = pte_mksoft_dirty(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 	else if (is_swap_pte(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 		pte = pte_swp_mksoft_dirty(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	return pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 		unsigned long old_addr, unsigned long old_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 		struct vm_area_struct *new_vma, pmd_t *new_pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 		unsigned long new_addr, bool need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 	pte_t *old_pte, *new_pte, pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 	spinlock_t *old_ptl, *new_ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 	bool force_flush = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	unsigned long len = old_end - old_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	 * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 	 * locks to ensure that rmap will always observe either the old or the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	 * new ptes. This is the easiest way to avoid races with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	 * truncate_pagecache(), page migration, etc...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	 * When need_rmap_locks is false, we use other ways to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 	 * such races:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 	 * - During exec() shift_arg_pages(), we use a specially tagged vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 	 *   which rmap call sites look for using vma_is_temporary_stack().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 	 * - During mremap(), new_vma is often known to be placed after vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 	 *   in rmap traversal order. This ensures rmap will always observe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	 *   either the old pte, or the new pte, or both (the page table locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	 *   serialize access to individual ptes, but only rmap traversal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 	 *   order guarantees that we won't miss both the old and new ptes).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 	if (need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 		take_rmap_locks(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 	 * We don't have to worry about the ordering of src and dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 	 * pte locks because exclusive mmap_lock prevents deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 	old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	new_pte = pte_offset_map(new_pmd, new_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 	new_ptl = pte_lockptr(mm, new_pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 	if (new_ptl != old_ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	flush_tlb_batched_pending(vma->vm_mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 	arch_enter_lazy_mmu_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 				   new_pte++, new_addr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 		if (pte_none(*old_pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 		pte = ptep_get_and_clear(mm, old_addr, old_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 		 * If we are remapping a valid PTE, make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 		 * to flush TLB before we drop the PTL for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 		 * PTE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 		 * NOTE! Both old and new PTL matter: the old one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 		 * for racing with page_mkclean(), the new one to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 		 * make sure the physical page stays valid until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 		 * the TLB entry for the old mapping has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 		 * flushed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 		if (pte_present(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 			force_flush = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 		pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 		pte = move_soft_dirty_pte(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 		set_pte_at(mm, new_addr, new_pte, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 	arch_leave_lazy_mmu_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 	if (force_flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 		flush_tlb_range(vma, old_end - len, old_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 	if (new_ptl != old_ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 		spin_unlock(new_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 	pte_unmap(new_pte - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 	pte_unmap_unlock(old_pte - 1, old_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 	if (need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 		drop_rmap_locks(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) #ifdef CONFIG_HAVE_MOVE_PMD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 		  unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 	spinlock_t *old_ptl, *new_ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 	pmd_t pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 	 * The destination pmd shouldn't be established, free_pgtables()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 	 * should have released it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 	 * However, there's a case during execve() where we use mremap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 	 * to move the initial stack, and in that case the target area
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 	 * may overlap the source area (always moving down).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 	 * If everything is PMD-aligned, that works fine, as moving
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 	 * each pmd down will clear the source pmd. But if we first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 	 * have a few 4kB-only pages that get moved down, and then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 	 * hit the "now the rest is PMD-aligned, let's do everything
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 	 * one pmd at a time", we will still have the old (now empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 	 * of any 4kB pages, but still there) PMD in the page table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 	 * tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 	 * Warn on it once - because we really should try to figure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 	 * out how to do this better - but then say "I won't move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 	 * this pmd".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 	 * One alternative might be to just unmap the target pmd at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 	 * this point, and verify that it really is empty. We'll see.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 	if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 	 * We don't have to worry about the ordering of src and dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 	 * ptlocks because exclusive mmap_lock prevents deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	new_ptl = pmd_lockptr(mm, new_pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 	if (new_ptl != old_ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 	/* Clear the pmd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 	pmd = *old_pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 	pmd_clear(old_pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 	VM_BUG_ON(!pmd_none(*new_pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 	/* Set the new pmd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 	set_pmd_at(mm, new_addr, new_pmd, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 	flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 	if (new_ptl != old_ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 		spin_unlock(new_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 	spin_unlock(old_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) static inline bool move_normal_pmd(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 		unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 		pmd_t *new_pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) #ifdef CONFIG_HAVE_MOVE_PUD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 		  unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 	spinlock_t *old_ptl, *new_ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 	pud_t pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 	 * The destination pud shouldn't be established, free_pgtables()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 	 * should have released it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 	if (WARN_ON_ONCE(!pud_none(*new_pud)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 	 * We don't have to worry about the ordering of src and dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 	 * ptlocks because exclusive mmap_lock prevents deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 	old_ptl = pud_lock(vma->vm_mm, old_pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 	new_ptl = pud_lockptr(mm, new_pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 	if (new_ptl != old_ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 	/* Clear the pud */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 	pud = *old_pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 	pud_clear(old_pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 	VM_BUG_ON(!pud_none(*new_pud));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 	/* Set the new pud */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 	set_pud_at(mm, new_addr, new_pud, pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 	flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 	if (new_ptl != old_ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 		spin_unlock(new_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 	spin_unlock(old_ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) static inline bool move_normal_pud(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 		unsigned long old_addr, unsigned long new_addr, pud_t *old_pud,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 		pud_t *new_pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) enum pgt_entry {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 	NORMAL_PMD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 	HPAGE_PMD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 	NORMAL_PUD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)  * Returns an extent of the corresponding size for the pgt_entry specified if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)  * valid. Else returns a smaller extent bounded by the end of the source and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)  * destination pgt_entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) static __always_inline unsigned long get_extent(enum pgt_entry entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 			unsigned long old_addr, unsigned long old_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 			unsigned long new_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 	unsigned long next, extent, mask, size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 	switch (entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 	case HPAGE_PMD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 	case NORMAL_PMD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 		mask = PMD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 		size = PMD_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 	case NORMAL_PUD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 		mask = PUD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 		size = PUD_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 		BUILD_BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 	next = (old_addr + size) & mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) 	/* even if next overflowed, extent below will be ok */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 	extent = next - old_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 	if (extent > old_end - old_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 		extent = old_end - old_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 	next = (new_addr + size) & mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 	if (extent > next - new_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 		extent = next - new_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 	return extent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)  * Attempts to speedup the move by moving entry at the level corresponding to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)  * pgt_entry. Returns true if the move was successful, else false.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 			unsigned long old_addr, unsigned long new_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 			void *old_entry, void *new_entry, bool need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 	bool moved = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 	/* See comment in move_ptes() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 	if (need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 		take_rmap_locks(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 	switch (entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 	case NORMAL_PMD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 		moved = move_normal_pmd(vma, old_addr, new_addr, old_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 					new_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 	case NORMAL_PUD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 		moved = move_normal_pud(vma, old_addr, new_addr, old_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 					new_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 	case HPAGE_PMD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 		moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 			move_huge_pmd(vma, old_addr, new_addr, old_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 				      new_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 		WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 	if (need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 		drop_rmap_locks(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 	return moved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) unsigned long move_page_tables(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 		unsigned long old_addr, struct vm_area_struct *new_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 		unsigned long new_addr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 		bool need_rmap_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 	unsigned long extent, old_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 	struct mmu_notifier_range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 	pmd_t *old_pmd, *new_pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 	old_end = old_addr + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 	flush_cache_range(vma, old_addr, old_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 	mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 				old_addr, old_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 	mmu_notifier_invalidate_range_start(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 	for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 		 * If extent is PUD-sized try to speed up the move by moving at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 		 * PUD level if possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 		extent = get_extent(NORMAL_PUD, old_addr, old_end, new_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 		if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 			pud_t *old_pud, *new_pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 			old_pud = get_old_pud(vma->vm_mm, old_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 			if (!old_pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 			new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 			if (!new_pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) 			if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) 					   old_pud, new_pud, true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 		extent = get_extent(NORMAL_PMD, old_addr, old_end, new_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) 		old_pmd = get_old_pmd(vma->vm_mm, old_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 		if (!old_pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) 		new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) 		if (!new_pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 		if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 		    pmd_devmap(*old_pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 			if (extent == HPAGE_PMD_SIZE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 			    move_pgt_entry(HPAGE_PMD, vma, old_addr, new_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) 					   old_pmd, new_pmd, need_rmap_locks))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) 			split_huge_pmd(vma, old_pmd, old_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 			if (pmd_trans_unstable(old_pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) 		} else if (IS_ENABLED(CONFIG_HAVE_MOVE_PMD) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) 			   extent == PMD_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) 			 * If the extent is PMD-sized, try to speed the move by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) 			 * moving at the PMD level if possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) 			if (move_pgt_entry(NORMAL_PMD, vma, old_addr, new_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 					   old_pmd, new_pmd, true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) 		if (pte_alloc(new_vma->vm_mm, new_pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) 		move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 			  new_pmd, new_addr, need_rmap_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) 	mmu_notifier_invalidate_range_end(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) 	return len + old_addr - old_end;	/* how much done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) static unsigned long move_vma(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) 		unsigned long old_addr, unsigned long old_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) 		unsigned long new_len, unsigned long new_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) 		bool *locked, unsigned long flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) 		struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) 	struct vm_area_struct *new_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) 	unsigned long vm_flags = vma->vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) 	unsigned long new_pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) 	unsigned long moved_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) 	unsigned long excess = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) 	unsigned long hiwater_vm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) 	int split = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) 	bool need_rmap_locks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) 	 * We'd prefer to avoid failure later on in do_munmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 	 * which may split one vma into three before unmapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) 	if (mm->map_count >= sysctl_max_map_count - 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) 	 * Advise KSM to break any KSM pages in the area to be moved:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) 	 * it would be confusing if they were to turn up at the new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) 	 * location, where they happen to coincide with different KSM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) 	 * pages recently unmapped.  But leave vma->vm_flags as it was,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) 	 * so KSM can come around to merge on vma and new_vma afterwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) 	err = ksm_madvise(vma, old_addr, old_addr + old_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) 						MADV_UNMERGEABLE, &vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) 		return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) 	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) 			   &need_rmap_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) 	if (!new_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) 	/* new_vma is returned protected by copy_vma, to prevent speculative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) 	 * page fault to be done in the destination area before we move the pte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) 	 * Now, we must also protect the source VMA since we don't want pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) 	 * to be mapped in our back while we are copying the PTEs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) 	if (vma != new_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) 		vm_write_begin(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) 	moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) 				     need_rmap_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) 	if (moved_len < old_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) 		err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) 	} else if (vma->vm_ops && vma->vm_ops->mremap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) 		err = vma->vm_ops->mremap(new_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) 	if (unlikely(err)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) 		 * On error, move entries back from new area to old,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) 		 * which will succeed since page tables still there,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) 		 * and then proceed to unmap new area instead of old.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) 		move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) 				 true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) 		if (vma != new_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) 			vm_write_end(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) 		vma = new_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) 		old_len = new_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) 		old_addr = new_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) 		new_addr = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) 		mremap_userfaultfd_prep(new_vma, uf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) 		arch_remap(mm, old_addr, old_addr + old_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) 			   new_addr, new_addr + new_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) 		if (vma != new_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) 			vm_write_end(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) 	vm_write_end(new_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) 	/* Conceal VM_ACCOUNT so old reservation is not undone */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) 	if (vm_flags & VM_ACCOUNT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) 		vma->vm_flags &= ~VM_ACCOUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) 		excess = vma->vm_end - vma->vm_start - old_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) 		if (old_addr > vma->vm_start &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) 		    old_addr + old_len < vma->vm_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) 			split = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) 	 * If we failed to move page tables we still do total_vm increment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) 	 * since do_munmap() will decrement it by old_len == new_len.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) 	 * Since total_vm is about to be raised artificially high for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) 	 * moment, we need to restore high watermark afterwards: if stats
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) 	 * are taken meanwhile, total_vm and hiwater_vm appear too high.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) 	 * If this were a serious issue, we'd add a flag to do_munmap().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) 	hiwater_vm = mm->hiwater_vm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) 	vm_stat_account(mm, vma->vm_flags, new_len >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) 	/* Tell pfnmap has moved from this vma */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) 	if (unlikely(vma->vm_flags & VM_PFNMAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) 		untrack_pfn_moved(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) 	if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) 		if (vm_flags & VM_ACCOUNT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) 			/* Always put back VM_ACCOUNT since we won't unmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) 			vma->vm_flags |= VM_ACCOUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) 			vm_acct_memory(new_len >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) 		 * VMAs can actually be merged back together in copy_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) 		 * calling merge_vma. This can happen with anonymous vmas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) 		 * which have not yet been faulted, so if we were to consider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) 		 * this VMA split we'll end up adding VM_ACCOUNT on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) 		 * next VMA, which is completely unrelated if this VMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) 		 * was re-merged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) 		if (split && new_vma == vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) 			split = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) 		/* We always clear VM_LOCKED[ONFAULT] on the old vma */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) 		vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) 		/* Because we won't unmap we don't need to touch locked_vm */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) 	if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) 		/* OOM: unable to split vma, just get accounts right */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) 		vm_unacct_memory(excess >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) 		excess = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) 	if (vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) 		mm->locked_vm += new_len >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) 		*locked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) 	mm->hiwater_vm = hiwater_vm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) 	/* Restore VM_ACCOUNT if one or two pieces of vma left */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) 	if (excess) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) 		vma->vm_flags |= VM_ACCOUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) 		if (split)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) 			vma->vm_next->vm_flags |= VM_ACCOUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) 	return new_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) static struct vm_area_struct *vma_to_resize(unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) 	unsigned long old_len, unsigned long new_len, unsigned long flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) 	unsigned long *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) 	struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) 	struct vm_area_struct *vma = find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) 	unsigned long pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) 	if (!vma || vma->vm_start > addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) 		return ERR_PTR(-EFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) 	 * !old_len is a special case where an attempt is made to 'duplicate'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) 	 * a mapping.  This makes no sense for private mappings as it will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) 	 * instead create a fresh/new mapping unrelated to the original.  This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) 	 * is contrary to the basic idea of mremap which creates new mappings
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) 	 * based on the original.  There are no known use cases for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) 	 * behavior.  As a result, fail such attempts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) 	if (!old_len && !(vma->vm_flags & (VM_SHARED | VM_MAYSHARE))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) 		pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap.  This is not supported.\n", current->comm, current->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) 	if ((flags & MREMAP_DONTUNMAP) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) 			(vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) 	if (is_vm_hugetlb_page(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) 	/* We can't remap across vm area boundaries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) 	if (old_len > vma->vm_end - addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) 		return ERR_PTR(-EFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) 	if (new_len == old_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) 		return vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) 	/* Need to be careful about a growing mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) 	pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) 	pgoff += vma->vm_pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) 	if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) 	if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) 		return ERR_PTR(-EFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) 	if (vma->vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) 		unsigned long locked, lock_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) 		locked = mm->locked_vm << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) 		lock_limit = rlimit(RLIMIT_MEMLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) 		locked += new_len - old_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) 		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) 			return ERR_PTR(-EAGAIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) 	if (!may_expand_vm(mm, vma->vm_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) 				(new_len - old_len) >> PAGE_SHIFT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) 	if (vma->vm_flags & VM_ACCOUNT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) 		unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) 		if (security_vm_enough_memory_mm(mm, charged))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) 			return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) 		*p = charged;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) 	return vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) 		unsigned long new_addr, unsigned long new_len, bool *locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) 		unsigned long flags, struct vm_userfaultfd_ctx *uf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) 		struct list_head *uf_unmap_early,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) 		struct list_head *uf_unmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) 	struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) 	struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) 	unsigned long ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) 	unsigned long charged = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) 	unsigned long map_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) 	if (offset_in_page(new_addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) 	if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) 	/* Ensure the old/new locations do not overlap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) 	if (addr + old_len > new_addr && new_addr + new_len > addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) 	 * move_vma() need us to stay 4 maps below the threshold, otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) 	 * it will bail out at the very beginning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) 	 * That is a problem if we have already unmaped the regions here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) 	 * (new_addr, and old_addr), because userspace will not know the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) 	 * state of the vma's after it gets -ENOMEM.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) 	 * So, to avoid such scenario we can pre-compute if the whole
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) 	 * operation has high chances to success map-wise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) 	 * Worst-scenario case is when both vma's (new_addr and old_addr) get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) 	 * split in 3 before unmaping it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) 	 * That means 2 more maps (1 for each) to the ones we already hold.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) 	 * Check whether current map count plus 2 still leads us to 4 maps below
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) 	 * the threshold, otherwise return -ENOMEM here to be more safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) 	if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) 	if (flags & MREMAP_FIXED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) 		ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) 	if (old_len >= new_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) 		ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) 		if (ret && old_len != new_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) 		old_len = new_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) 	vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) 	if (IS_ERR(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) 		ret = PTR_ERR(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) 	/* MREMAP_DONTUNMAP expands by old_len since old_len == new_len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) 	if (flags & MREMAP_DONTUNMAP &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) 		!may_expand_vm(mm, vma->vm_flags, old_len >> PAGE_SHIFT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) 		ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) 	if (flags & MREMAP_FIXED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) 		map_flags |= MAP_FIXED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) 	if (vma->vm_flags & VM_MAYSHARE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) 		map_flags |= MAP_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) 	ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) 				((addr - vma->vm_start) >> PAGE_SHIFT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) 				map_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) 	if (IS_ERR_VALUE(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) 		goto out1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) 	/* We got a new mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) 	if (!(flags & MREMAP_FIXED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) 		new_addr = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) 	ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) 		       uf_unmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) 	if (!(offset_in_page(ret)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) out1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) 	vm_unacct_memory(charged);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) 	unsigned long end = vma->vm_end + delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) 	if (end < vma->vm_end) /* overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) 	if (vma->vm_next && vma->vm_next->vm_start < end) /* intersection */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) 	if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) 			      0, MAP_FIXED) & ~PAGE_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818)  * Expand (or shrink) an existing mapping, potentially moving it at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819)  * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)  * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822)  * This option implies MREMAP_MAYMOVE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) 		unsigned long, new_len, unsigned long, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) 		unsigned long, new_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) 	struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) 	struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) 	unsigned long ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) 	unsigned long charged = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) 	bool locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) 	bool downgraded = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) 	struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) 	LIST_HEAD(uf_unmap_early);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) 	LIST_HEAD(uf_unmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) 	 * There is a deliberate asymmetry here: we strip the pointer tag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) 	 * from the old address but leave the new address alone. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) 	 * for consistency with mmap(), where we prevent the creation of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) 	 * aliasing mappings in userspace by leaving the tag bits of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) 	 * mapping address intact. A non-zero tag will cause the subsequent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) 	 * range checks to reject the address as invalid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) 	 * See Documentation/arm64/tagged-address-abi.rst for more information.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) 	addr = untagged_addr(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) 	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) 	if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) 	 * MREMAP_DONTUNMAP is always a move and it does not allow resizing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) 	 * in the process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) 	if (flags & MREMAP_DONTUNMAP &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) 			(!(flags & MREMAP_MAYMOVE) || old_len != new_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) 	if (offset_in_page(addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) 	old_len = PAGE_ALIGN(old_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) 	new_len = PAGE_ALIGN(new_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) 	 * We allow a zero old-len as a special case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) 	 * for DOS-emu "duplicate shm area" thing. But
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) 	 * a zero new-len is nonsensical.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) 	if (!new_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) 	if (mmap_write_lock_killable(current->mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) 		return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) 	if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) 		ret = mremap_to(addr, old_len, new_addr, new_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) 				&locked, flags, &uf, &uf_unmap_early,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) 				&uf_unmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) 	 * Always allow a shrinking remap: that just unmaps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) 	 * the unnecessary pages..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) 	 * __do_munmap does all the needed commit accounting, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) 	 * downgrades mmap_lock to read if so directed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) 	if (old_len >= new_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) 		int retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) 		retval = __do_munmap(mm, addr+new_len, old_len - new_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) 				  &uf_unmap, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) 		if (retval < 0 && old_len != new_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) 			ret = retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) 		/* Returning 1 indicates mmap_lock is downgraded to read. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) 		} else if (retval == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) 			downgraded = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) 		ret = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) 	 * Ok, we need to grow..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) 	vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) 	if (IS_ERR(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) 		ret = PTR_ERR(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) 	/* old_len exactly to the end of the area..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) 	if (old_len == vma->vm_end - addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) 		/* can we just expand the current mapping? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) 		if (vma_expandable(vma, new_len - old_len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) 			int pages = (new_len - old_len) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) 			if (vma_adjust(vma, vma->vm_start, addr + new_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) 				       vma->vm_pgoff, NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) 				ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) 			vm_stat_account(mm, vma->vm_flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) 			if (vma->vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) 				mm->locked_vm += pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) 				locked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) 				new_addr = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) 			ret = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) 	 * We weren't able to just expand or shrink the area,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) 	 * we need to create a new one and move it..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) 	ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) 	if (flags & MREMAP_MAYMOVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) 		unsigned long map_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) 		if (vma->vm_flags & VM_MAYSHARE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) 			map_flags |= MAP_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) 		new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) 					vma->vm_pgoff +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) 					((addr - vma->vm_start) >> PAGE_SHIFT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) 					map_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) 		if (IS_ERR_VALUE(new_addr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) 			ret = new_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) 		ret = move_vma(vma, addr, old_len, new_len, new_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) 			       &locked, flags, &uf, &uf_unmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) 	if (offset_in_page(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) 		vm_unacct_memory(charged);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) 		locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) 	if (downgraded)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) 		mmap_read_unlock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) 		mmap_write_unlock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) 	if (locked && new_len > old_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) 		mm_populate(new_addr + old_len, new_len - old_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) 	userfaultfd_unmap_complete(mm, &uf_unmap_early);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) 	mremap_userfaultfd_complete(&uf, addr, ret, old_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) 	userfaultfd_unmap_complete(mm, &uf_unmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) }