Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) #include <linux/pagewalk.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4) #include <linux/bitops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5) #include <linux/mmu_notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6) #include <asm/cacheflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  * struct wp_walk - Private struct for pagetable walk callbacks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11)  * @range: Range for mmu notifiers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  * @tlbflush_start: Address of first modified pte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  * @tlbflush_end: Address of last modified pte + 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  * @total: Total number of modified ptes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) struct wp_walk {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) 	struct mmu_notifier_range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) 	unsigned long tlbflush_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) 	unsigned long tlbflush_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) 	unsigned long total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24)  * wp_pte - Write-protect a pte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25)  * @pte: Pointer to the pte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26)  * @addr: The virtual page address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27)  * @walk: pagetable walk callback argument
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29)  * The function write-protects a pte and records the range in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30)  * virtual address space of touched ptes for efficient range TLB flushes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) static int wp_pte(pte_t *pte, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 		  struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 	struct wp_walk *wpwalk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	pte_t ptent = *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 	if (pte_write(ptent)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 		pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 		ptent = pte_wrprotect(old_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 		ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 		wpwalk->total++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 		wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 		wpwalk->tlbflush_end = max(wpwalk->tlbflush_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 					   addr + PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53)  * struct clean_walk - Private struct for the clean_record_pte function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54)  * @base: struct wp_walk we derive from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55)  * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56)  * @bitmap: Bitmap with one bit for each page offset in the address_space range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57)  * covered.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58)  * @start: Address_space page offset of first modified pte relative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59)  * to @bitmap_pgoff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60)  * @end: Address_space page offset of last modified pte relative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61)  * to @bitmap_pgoff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) struct clean_walk {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	struct wp_walk base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 	pgoff_t bitmap_pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	unsigned long *bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 	pgoff_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 	pgoff_t end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) #define to_clean_walk(_wpwalk) container_of(_wpwalk, struct clean_walk, base)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74)  * clean_record_pte - Clean a pte and record its address space offset in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75)  * bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76)  * @pte: Pointer to the pte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77)  * @addr: The virtual page address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78)  * @walk: pagetable walk callback argument
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80)  * The function cleans a pte and records the range in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81)  * virtual address space of touched ptes for efficient TLB flushes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82)  * It also records dirty ptes in a bitmap representing page offsets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83)  * in the address_space, as well as the first and last of the bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84)  * touched.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) static int clean_record_pte(pte_t *pte, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 			    unsigned long end, struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 	struct wp_walk *wpwalk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	struct clean_walk *cwalk = to_clean_walk(wpwalk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	pte_t ptent = *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 	if (pte_dirty(ptent)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 		pgoff_t pgoff = ((addr - walk->vma->vm_start) >> PAGE_SHIFT) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 			walk->vma->vm_pgoff - cwalk->bitmap_pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 		pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 		ptent = pte_mkclean(old_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 		ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 		wpwalk->total++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 		wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 		wpwalk->tlbflush_end = max(wpwalk->tlbflush_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 					   addr + PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 		__set_bit(pgoff, cwalk->bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 		cwalk->start = min(cwalk->start, pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 		cwalk->end = max(cwalk->end, pgoff + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)  * wp_clean_pmd_entry - The pagewalk pmd callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)  * Dirty-tracking should take place on the PTE level, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)  * WARN() if encountering a dirty huge pmd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)  * Furthermore, never split huge pmds, since that currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)  * causes dirty info loss. The pagefault handler should do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)  * that if needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) static int wp_clean_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 			      struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	pmd_t pmdval = pmd_read_atomic(pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 	if (!pmd_trans_unstable(&pmdval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	if (pmd_none(pmdval)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 		walk->action = ACTION_AGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 	/* Huge pmd, present or migrated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 	walk->action = ACTION_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 	if (pmd_trans_huge(pmdval) || pmd_devmap(pmdval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 		WARN_ON(pmd_write(pmdval) || pmd_dirty(pmdval));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)  * wp_clean_pud_entry - The pagewalk pud callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)  * Dirty-tracking should take place on the PTE level, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)  * WARN() if encountering a dirty huge puds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)  * Furthermore, never split huge puds, since that currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)  * causes dirty info loss. The pagefault handler should do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)  * that if needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) static int wp_clean_pud_entry(pud_t *pud, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 			      struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 	pud_t pudval = READ_ONCE(*pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 	if (!pud_trans_unstable(&pudval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 	if (pud_none(pudval)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 		walk->action = ACTION_AGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	/* Huge pud */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 	walk->action = ACTION_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 	if (pud_trans_huge(pudval) || pud_devmap(pudval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 		WARN_ON(pud_write(pudval) || pud_dirty(pudval));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)  * wp_clean_pre_vma - The pagewalk pre_vma callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)  * The pre_vma callback performs the cache flush, stages the tlb flush
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)  * and calls the necessary mmu notifiers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) static int wp_clean_pre_vma(unsigned long start, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 			    struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 	struct wp_walk *wpwalk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 	wpwalk->tlbflush_start = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 	wpwalk->tlbflush_end = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	mmu_notifier_range_init(&wpwalk->range, MMU_NOTIFY_PROTECTION_PAGE, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 				walk->vma, walk->mm, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	mmu_notifier_invalidate_range_start(&wpwalk->range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 	flush_cache_range(walk->vma, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 	 * We're not using tlb_gather_mmu() since typically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 	 * only a small subrange of PTEs are affected, whereas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 	 * tlb_gather_mmu() records the full range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 	inc_tlb_flush_pending(walk->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)  * wp_clean_post_vma - The pagewalk post_vma callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)  * The post_vma callback performs the tlb flush and calls necessary mmu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)  * notifiers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) static void wp_clean_post_vma(struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 	struct wp_walk *wpwalk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 	if (mm_tlb_flush_nested(walk->mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 		flush_tlb_range(walk->vma, wpwalk->range.start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 				wpwalk->range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 	else if (wpwalk->tlbflush_end > wpwalk->tlbflush_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 		flush_tlb_range(walk->vma, wpwalk->tlbflush_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 				wpwalk->tlbflush_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 	mmu_notifier_invalidate_range_end(&wpwalk->range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 	dec_tlb_flush_pending(walk->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)  * wp_clean_test_walk - The pagewalk test_walk callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)  * Won't perform dirty-tracking on COW, read-only or HUGETLB vmas.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) static int wp_clean_test_walk(unsigned long start, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 			      struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 	unsigned long vm_flags = READ_ONCE(walk->vma->vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 	/* Skip non-applicable VMAs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 	if ((vm_flags & (VM_SHARED | VM_MAYWRITE | VM_HUGETLB)) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 	    (VM_SHARED | VM_MAYWRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) static const struct mm_walk_ops clean_walk_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	.pte_entry = clean_record_pte,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 	.pmd_entry = wp_clean_pmd_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	.pud_entry = wp_clean_pud_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 	.test_walk = wp_clean_test_walk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	.pre_vma = wp_clean_pre_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 	.post_vma = wp_clean_post_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) static const struct mm_walk_ops wp_walk_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	.pte_entry = wp_pte,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 	.pmd_entry = wp_clean_pmd_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 	.pud_entry = wp_clean_pud_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 	.test_walk = wp_clean_test_walk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 	.pre_vma = wp_clean_pre_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 	.post_vma = wp_clean_post_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)  * wp_shared_mapping_range - Write-protect all ptes in an address space range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)  * @mapping: The address_space we want to write protect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)  * @first_index: The first page offset in the range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)  * @nr: Number of incremental page offsets to cover
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)  * Note: This function currently skips transhuge page-table entries, since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)  * it's intended for dirty-tracking on the PTE level. It will warn on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)  * encountering transhuge write-enabled entries, though, and can easily be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)  * extended to handle them as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)  * Return: The number of ptes actually write-protected. Note that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)  * already write-protected ptes are not counted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) unsigned long wp_shared_mapping_range(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 				      pgoff_t first_index, pgoff_t nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 	struct wp_walk wpwalk = { .total = 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 	i_mmap_lock_read(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 	WARN_ON(walk_page_mapping(mapping, first_index, nr, &wp_walk_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 				  &wpwalk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 	i_mmap_unlock_read(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 	return wpwalk.total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) EXPORT_SYMBOL_GPL(wp_shared_mapping_range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)  * clean_record_shared_mapping_range - Clean and record all ptes in an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)  * address space range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)  * @mapping: The address_space we want to clean
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)  * @first_index: The first page offset in the range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)  * @nr: Number of incremental page offsets to cover
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)  * @bitmap_pgoff: The page offset of the first bit in @bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)  * @bitmap: Pointer to a bitmap of at least @nr bits. The bitmap needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)  * cover the whole range @first_index..@first_index + @nr.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)  * @start: Pointer to number of the first set bit in @bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)  * is modified as new bits are set by the function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)  * @end: Pointer to the number of the last set bit in @bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)  * none set. The value is modified as new bits are set by the function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)  * Note: When this function returns there is no guarantee that a CPU has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)  * not already dirtied new ptes. However it will not clean any ptes not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)  * reported in the bitmap. The guarantees are as follows:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)  * a) All ptes dirty when the function starts executing will end up recorded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)  *    in the bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)  * b) All ptes dirtied after that will either remain dirty, be recorded in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)  *    bitmap or both.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)  * If a caller needs to make sure all dirty ptes are picked up and none
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)  * additional are added, it first needs to write-protect the address-space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)  * range and make sure new writers are blocked in page_mkwrite() or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)  * pfn_mkwrite(). And then after a TLB flush following the write-protection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)  * pick up all dirty bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)  * Note: This function currently skips transhuge page-table entries, since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)  * it's intended for dirty-tracking on the PTE level. It will warn on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)  * encountering transhuge dirty entries, though, and can easily be extended
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)  * to handle them as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)  * Return: The number of dirty ptes actually cleaned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) unsigned long clean_record_shared_mapping_range(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 						pgoff_t first_index, pgoff_t nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 						pgoff_t bitmap_pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 						unsigned long *bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 						pgoff_t *start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 						pgoff_t *end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 	bool none_set = (*start >= *end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 	struct clean_walk cwalk = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 		.base = { .total = 0 },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 		.bitmap_pgoff = bitmap_pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 		.bitmap = bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 		.start = none_set ? nr : *start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 		.end = none_set ? 0 : *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 	i_mmap_lock_read(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 	WARN_ON(walk_page_mapping(mapping, first_index, nr, &clean_walk_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 				  &cwalk.base));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 	i_mmap_unlock_read(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 	*start = cwalk.start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 	*end = cwalk.end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 	return cwalk.base.total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) EXPORT_SYMBOL_GPL(clean_record_shared_mapping_range);