^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <linux/pagewalk.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * We want to know the real level where a entry is located ignoring any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * folding of levels which may be happening. For example if p4d is folded then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * a missing entry found at level 1 (p4d) is actually at level 0 (pgd).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) static int real_depth(int depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) if (depth == 3 && PTRS_PER_PMD == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) depth = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) if (depth == 2 && PTRS_PER_PUD == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) depth = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) if (depth == 1 && PTRS_PER_P4D == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) depth = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) return depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) static int walk_pte_range_inner(pte_t *pte, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) unsigned long end, struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) const struct mm_walk_ops *ops = walk->ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) if (addr >= end - PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) addr += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) pte++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) if (walk->no_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) pte = pte_offset_map(pmd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) err = walk_pte_range_inner(pte, addr, end, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) pte_unmap(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) err = walk_pte_range_inner(pte, addr, end, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) pte_unmap_unlock(pte, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) const struct mm_walk_ops *ops = walk->ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) int depth = real_depth(3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) pmd = pmd_offset(pud, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) next = pmd_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) if (ops->pte_hole)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) err = ops->pte_hole(addr, next, depth, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) walk->action = ACTION_SUBTREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * This implies that each ->pmd_entry() handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * needs to know about pmd_trans_huge() pmds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) if (ops->pmd_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) err = ops->pmd_entry(pmd, addr, next, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) if (walk->action == ACTION_AGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * Check this here so we only break down trans_huge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * pages when we _need_ to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) if ((!walk->vma && (pmd_leaf(*pmd) || !pmd_present(*pmd))) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) walk->action == ACTION_CONTINUE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) !(ops->pte_entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) if (walk->vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) split_huge_pmd(walk->vma, pmd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) if (pmd_trans_unstable(pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) err = walk_pte_range(pmd, addr, next, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) } while (pmd++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) const struct mm_walk_ops *ops = walk->ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) int depth = real_depth(2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) pud = pud_offset(p4d, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) next = pud_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) if (ops->pte_hole)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) err = ops->pte_hole(addr, next, depth, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) walk->action = ACTION_SUBTREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) if (ops->pud_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) err = ops->pud_entry(pud, addr, next, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) if (walk->action == ACTION_AGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) if ((!walk->vma && (pud_leaf(*pud) || !pud_present(*pud))) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) walk->action == ACTION_CONTINUE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) !(ops->pmd_entry || ops->pte_entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) if (walk->vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) split_huge_pud(walk->vma, pud, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) if (pud_none(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) err = walk_pmd_range(pud, addr, next, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) } while (pud++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) const struct mm_walk_ops *ops = walk->ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) int depth = real_depth(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) p4d = p4d_offset(pgd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) next = p4d_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) if (p4d_none_or_clear_bad(p4d)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) if (ops->pte_hole)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) err = ops->pte_hole(addr, next, depth, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) if (ops->p4d_entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) err = ops->p4d_entry(p4d, addr, next, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) err = walk_pud_range(p4d, addr, next, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) } while (p4d++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) static int walk_pgd_range(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) const struct mm_walk_ops *ops = walk->ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) if (walk->pgd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) pgd = walk->pgd + pgd_index(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) pgd = pgd_offset(walk->mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) next = pgd_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) if (pgd_none_or_clear_bad(pgd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (ops->pte_hole)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) err = ops->pte_hole(addr, next, 0, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) if (ops->pgd_entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) err = ops->pgd_entry(pgd, addr, next, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) ops->pte_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) err = walk_p4d_range(pgd, addr, next, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) } while (pgd++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) #ifdef CONFIG_HUGETLB_PAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) unsigned long end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) return boundary < end ? boundary : end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) static int walk_hugetlb_range(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) struct vm_area_struct *vma = walk->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) struct hstate *h = hstate_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) unsigned long hmask = huge_page_mask(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) unsigned long sz = huge_page_size(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) const struct mm_walk_ops *ops = walk->ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) next = hugetlb_entry_end(h, addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) pte = huge_pte_offset(walk->mm, addr & hmask, sz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) if (pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) else if (ops->pte_hole)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) err = ops->pte_hole(addr, next, -1, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) } while (addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) #else /* CONFIG_HUGETLB_PAGE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) static int walk_hugetlb_range(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) #endif /* CONFIG_HUGETLB_PAGE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * Decide whether we really walk over the current vma on [@start, @end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) * or skip it via the returned value. Return 0 if we do walk over the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * current vma, and return 1 if we skip the vma. Negative values means
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * error, where we abort the current walk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) static int walk_page_test(unsigned long start, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) struct vm_area_struct *vma = walk->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) const struct mm_walk_ops *ops = walk->ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) if (ops->test_walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) return ops->test_walk(start, end, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * range, so we don't walk over it as we do for normal vmas. However,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * Some callers are interested in handling hole range and they don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * want to just ignore any single address range. Such users certainly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * define their ->pte_hole() callbacks, so let's delegate them to handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * vma(VM_PFNMAP).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) if (vma->vm_flags & VM_PFNMAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) int err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) if (ops->pte_hole)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) err = ops->pte_hole(start, end, -1, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) return err ? err : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) static int __walk_page_range(unsigned long start, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) struct vm_area_struct *vma = walk->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) const struct mm_walk_ops *ops = walk->ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) if (vma && ops->pre_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) err = ops->pre_vma(start, end, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) if (vma && is_vm_hugetlb_page(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) if (ops->hugetlb_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) err = walk_hugetlb_range(start, end, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) err = walk_pgd_range(start, end, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) if (vma && ops->post_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) ops->post_vma(walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) * walk_page_range - walk page table with caller specific callbacks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) * @mm: mm_struct representing the target process of page table walk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) * @start: start address of the virtual address range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) * @end: end address of the virtual address range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * @ops: operation to call during the walk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * @private: private data for callbacks' usage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) * Recursively walk the page table tree of the process represented by @mm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) * within the virtual address range [@start, @end). During walking, we can do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) * some caller-specific works for each entry, by setting up pmd_entry(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) * callbacks, the associated entries/pages are just ignored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) * The return values of these callbacks are commonly defined like below:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) * - 0 : succeeded to handle the current entry, and if you don't reach the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) * end address yet, continue to walk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) * - >0 : succeeded to handle the current entry, and return to the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) * with caller specific value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * - <0 : failed to handle the current entry, and return to the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) * with error code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) * Before starting to walk page table, some callers want to check whether
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) * they really want to walk over the current vma, typically by checking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) * its vm_flags. walk_page_test() and @ops->test_walk() are used for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * purpose.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) * If operations need to be staged before and committed after a vma is walked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) * there are two callbacks, pre_vma() and post_vma(). Note that post_vma(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) * since it is intended to handle commit-type operations, can't return any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) * errors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) * struct mm_walk keeps current values of some common data like vma and pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) * which are useful for the access from callbacks. If you want to pass some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) * caller-specific data to callbacks, @private should be helpful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) * Locking:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * because these function traverse vma list and/or access to vma's data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) int walk_page_range(struct mm_struct *mm, unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) unsigned long end, const struct mm_walk_ops *ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) void *private)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) struct mm_walk walk = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) .ops = ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) .mm = mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) .private = private,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) if (start >= end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) if (!walk.mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) mmap_assert_locked(walk.mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) vma = find_vma(walk.mm, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) if (!vma) { /* after the last vma */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) walk.vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) next = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) } else if (start < vma->vm_start) { /* outside vma */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) walk.vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) next = min(end, vma->vm_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) } else { /* inside vma */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) walk.vma = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) next = min(end, vma->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) vma = vma->vm_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) err = walk_page_test(start, next, &walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) if (err > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) * positive return values are purely for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * controlling the pagewalk, so should never
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) * be passed to the callers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) if (walk.vma || walk.ops->pte_hole)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) err = __walk_page_range(start, next, &walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) } while (start = next, start < end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) EXPORT_SYMBOL_GPL(walk_page_range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) * Similar to walk_page_range() but can walk any page tables even if they are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) * not backed by VMAs. Because 'unusual' entries may be walked this function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * will also not lock the PTEs for the pte_entry() callback. This is useful for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * walking the kernel pages tables or page tables for firmware.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) unsigned long end, const struct mm_walk_ops *ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) pgd_t *pgd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) void *private)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) struct mm_walk walk = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) .ops = ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) .mm = mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) .pgd = pgd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) .private = private,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) .no_vma = true
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) if (start >= end || !walk.mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) mmap_assert_locked(walk.mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) return __walk_page_range(start, end, &walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) void *private)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) struct mm_walk walk = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) .ops = ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) .mm = vma->vm_mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) .vma = vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) .private = private,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) if (!walk.mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) mmap_assert_locked(walk.mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) if (err > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * walk_page_mapping - walk all memory areas mapped into a struct address_space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * @mapping: Pointer to the struct address_space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) * @first_index: First page offset in the address_space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) * @nr: Number of incremental page offsets to cover
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) * @ops: operation to call during the walk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) * @private: private data for callbacks' usage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) * This function walks all memory areas mapped into a struct address_space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) * The walk is limited to only the given page-size index range, but if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) * the index boundaries cross a huge page-table entry, that entry will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) * included.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) * Also see walk_page_range() for additional information.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * Locking:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) * This function can't require that the struct mm_struct::mmap_lock is held,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) * since @mapping may be mapped by multiple processes. Instead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) * @mapping->i_mmap_rwsem must be held. This might have implications in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) * callbacks, and it's up tho the caller to ensure that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) * struct mm_struct::mmap_lock is not needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) * Also this means that a caller can't rely on the struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) * vm_area_struct::vm_flags to be constant across a call,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * except for immutable flags. Callers requiring this shouldn't use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * Return: 0 on success, negative error code on failure, positive number on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) * caller defined premature termination.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) pgoff_t nr, const struct mm_walk_ops *ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) void *private)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) struct mm_walk walk = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) .ops = ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) .private = private,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) pgoff_t vba, vea, cba, cea;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) unsigned long start_addr, end_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) lockdep_assert_held(&mapping->i_mmap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) first_index + nr - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) /* Clip to the vma */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) vba = vma->vm_pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) vea = vba + vma_pages(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) cba = first_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) cba = max(cba, vba);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) cea = first_index + nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) cea = min(cea, vea);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) if (start_addr >= end_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) walk.vma = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) walk.mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) if (err > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) } else if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) err = __walk_page_range(start_addr, end_addr, &walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) }