^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <linux/pagewalk.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/bitops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/mmu_notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <asm/cacheflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * struct wp_walk - Private struct for pagetable walk callbacks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * @range: Range for mmu notifiers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * @tlbflush_start: Address of first modified pte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * @tlbflush_end: Address of last modified pte + 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * @total: Total number of modified ptes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) struct wp_walk {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) struct mmu_notifier_range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) unsigned long tlbflush_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) unsigned long tlbflush_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) unsigned long total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * wp_pte - Write-protect a pte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * @pte: Pointer to the pte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * @addr: The virtual page address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * @walk: pagetable walk callback argument
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * The function write-protects a pte and records the range in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * virtual address space of touched ptes for efficient range TLB flushes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) static int wp_pte(pte_t *pte, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) struct wp_walk *wpwalk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) pte_t ptent = *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) if (pte_write(ptent)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) ptent = pte_wrprotect(old_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) wpwalk->total++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) wpwalk->tlbflush_end = max(wpwalk->tlbflush_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) addr + PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * struct clean_walk - Private struct for the clean_record_pte function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * @base: struct wp_walk we derive from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * @bitmap: Bitmap with one bit for each page offset in the address_space range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * covered.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * @start: Address_space page offset of first modified pte relative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * to @bitmap_pgoff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * @end: Address_space page offset of last modified pte relative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * to @bitmap_pgoff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) struct clean_walk {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct wp_walk base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) pgoff_t bitmap_pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) unsigned long *bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) pgoff_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) pgoff_t end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #define to_clean_walk(_wpwalk) container_of(_wpwalk, struct clean_walk, base)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * clean_record_pte - Clean a pte and record its address space offset in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * @pte: Pointer to the pte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * @addr: The virtual page address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * @walk: pagetable walk callback argument
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * The function cleans a pte and records the range in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * virtual address space of touched ptes for efficient TLB flushes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * It also records dirty ptes in a bitmap representing page offsets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * in the address_space, as well as the first and last of the bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * touched.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) static int clean_record_pte(pte_t *pte, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) unsigned long end, struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) struct wp_walk *wpwalk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) struct clean_walk *cwalk = to_clean_walk(wpwalk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) pte_t ptent = *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) if (pte_dirty(ptent)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) pgoff_t pgoff = ((addr - walk->vma->vm_start) >> PAGE_SHIFT) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) walk->vma->vm_pgoff - cwalk->bitmap_pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) ptent = pte_mkclean(old_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) wpwalk->total++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) wpwalk->tlbflush_end = max(wpwalk->tlbflush_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) addr + PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) __set_bit(pgoff, cwalk->bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) cwalk->start = min(cwalk->start, pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) cwalk->end = max(cwalk->end, pgoff + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * wp_clean_pmd_entry - The pagewalk pmd callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * Dirty-tracking should take place on the PTE level, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * WARN() if encountering a dirty huge pmd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * Furthermore, never split huge pmds, since that currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * causes dirty info loss. The pagefault handler should do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * that if needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) static int wp_clean_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) pmd_t pmdval = pmd_read_atomic(pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) if (!pmd_trans_unstable(&pmdval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) if (pmd_none(pmdval)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) walk->action = ACTION_AGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) /* Huge pmd, present or migrated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) walk->action = ACTION_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) if (pmd_trans_huge(pmdval) || pmd_devmap(pmdval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) WARN_ON(pmd_write(pmdval) || pmd_dirty(pmdval));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * wp_clean_pud_entry - The pagewalk pud callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * Dirty-tracking should take place on the PTE level, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * WARN() if encountering a dirty huge puds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * Furthermore, never split huge puds, since that currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) * causes dirty info loss. The pagefault handler should do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * that if needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) static int wp_clean_pud_entry(pud_t *pud, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) pud_t pudval = READ_ONCE(*pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) if (!pud_trans_unstable(&pudval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) if (pud_none(pudval)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) walk->action = ACTION_AGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) /* Huge pud */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) walk->action = ACTION_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) if (pud_trans_huge(pudval) || pud_devmap(pudval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) WARN_ON(pud_write(pudval) || pud_dirty(pudval));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * wp_clean_pre_vma - The pagewalk pre_vma callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * The pre_vma callback performs the cache flush, stages the tlb flush
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * and calls the necessary mmu notifiers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) static int wp_clean_pre_vma(unsigned long start, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) struct wp_walk *wpwalk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) wpwalk->tlbflush_start = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) wpwalk->tlbflush_end = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) mmu_notifier_range_init(&wpwalk->range, MMU_NOTIFY_PROTECTION_PAGE, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) walk->vma, walk->mm, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) mmu_notifier_invalidate_range_start(&wpwalk->range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) flush_cache_range(walk->vma, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * We're not using tlb_gather_mmu() since typically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * only a small subrange of PTEs are affected, whereas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * tlb_gather_mmu() records the full range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) inc_tlb_flush_pending(walk->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * wp_clean_post_vma - The pagewalk post_vma callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * The post_vma callback performs the tlb flush and calls necessary mmu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) * notifiers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) static void wp_clean_post_vma(struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) struct wp_walk *wpwalk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) if (mm_tlb_flush_nested(walk->mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) flush_tlb_range(walk->vma, wpwalk->range.start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) wpwalk->range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) else if (wpwalk->tlbflush_end > wpwalk->tlbflush_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) flush_tlb_range(walk->vma, wpwalk->tlbflush_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) wpwalk->tlbflush_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) mmu_notifier_invalidate_range_end(&wpwalk->range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) dec_tlb_flush_pending(walk->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * wp_clean_test_walk - The pagewalk test_walk callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * Won't perform dirty-tracking on COW, read-only or HUGETLB vmas.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) static int wp_clean_test_walk(unsigned long start, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) unsigned long vm_flags = READ_ONCE(walk->vma->vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) /* Skip non-applicable VMAs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) if ((vm_flags & (VM_SHARED | VM_MAYWRITE | VM_HUGETLB)) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) (VM_SHARED | VM_MAYWRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) static const struct mm_walk_ops clean_walk_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) .pte_entry = clean_record_pte,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) .pmd_entry = wp_clean_pmd_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) .pud_entry = wp_clean_pud_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) .test_walk = wp_clean_test_walk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) .pre_vma = wp_clean_pre_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) .post_vma = wp_clean_post_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) static const struct mm_walk_ops wp_walk_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) .pte_entry = wp_pte,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) .pmd_entry = wp_clean_pmd_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) .pud_entry = wp_clean_pud_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) .test_walk = wp_clean_test_walk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) .pre_vma = wp_clean_pre_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) .post_vma = wp_clean_post_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * wp_shared_mapping_range - Write-protect all ptes in an address space range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * @mapping: The address_space we want to write protect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * @first_index: The first page offset in the range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * @nr: Number of incremental page offsets to cover
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * Note: This function currently skips transhuge page-table entries, since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * it's intended for dirty-tracking on the PTE level. It will warn on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * encountering transhuge write-enabled entries, though, and can easily be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * extended to handle them as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) * Return: The number of ptes actually write-protected. Note that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) * already write-protected ptes are not counted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) unsigned long wp_shared_mapping_range(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) pgoff_t first_index, pgoff_t nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) struct wp_walk wpwalk = { .total = 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) i_mmap_lock_read(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) WARN_ON(walk_page_mapping(mapping, first_index, nr, &wp_walk_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) &wpwalk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) i_mmap_unlock_read(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) return wpwalk.total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) EXPORT_SYMBOL_GPL(wp_shared_mapping_range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) * clean_record_shared_mapping_range - Clean and record all ptes in an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) * address space range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) * @mapping: The address_space we want to clean
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * @first_index: The first page offset in the range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * @nr: Number of incremental page offsets to cover
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) * @bitmap_pgoff: The page offset of the first bit in @bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) * @bitmap: Pointer to a bitmap of at least @nr bits. The bitmap needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * cover the whole range @first_index..@first_index + @nr.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * @start: Pointer to number of the first set bit in @bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * is modified as new bits are set by the function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * @end: Pointer to the number of the last set bit in @bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * none set. The value is modified as new bits are set by the function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * Note: When this function returns there is no guarantee that a CPU has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * not already dirtied new ptes. However it will not clean any ptes not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * reported in the bitmap. The guarantees are as follows:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * a) All ptes dirty when the function starts executing will end up recorded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) * in the bitmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) * b) All ptes dirtied after that will either remain dirty, be recorded in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) * bitmap or both.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) * If a caller needs to make sure all dirty ptes are picked up and none
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) * additional are added, it first needs to write-protect the address-space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) * range and make sure new writers are blocked in page_mkwrite() or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * pfn_mkwrite(). And then after a TLB flush following the write-protection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * pick up all dirty bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * Note: This function currently skips transhuge page-table entries, since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * it's intended for dirty-tracking on the PTE level. It will warn on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * encountering transhuge dirty entries, though, and can easily be extended
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * to handle them as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * Return: The number of dirty ptes actually cleaned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) unsigned long clean_record_shared_mapping_range(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) pgoff_t first_index, pgoff_t nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) pgoff_t bitmap_pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) unsigned long *bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) pgoff_t *start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) pgoff_t *end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) bool none_set = (*start >= *end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) struct clean_walk cwalk = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) .base = { .total = 0 },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) .bitmap_pgoff = bitmap_pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) .bitmap = bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) .start = none_set ? nr : *start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) .end = none_set ? 0 : *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) i_mmap_lock_read(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) WARN_ON(walk_page_mapping(mapping, first_index, nr, &clean_walk_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) &cwalk.base));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) i_mmap_unlock_read(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) *start = cwalk.start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) *end = cwalk.end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) return cwalk.base.total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) EXPORT_SYMBOL_GPL(clean_record_shared_mapping_range);