^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2019 Oracle. All Rights Reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Author: Darrick J. Wong <darrick.wong@oracle.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include "xfs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include "xfs_fs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include "xfs_shared.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "xfs_format.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "xfs_trans_resv.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "xfs_mount.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "xfs_sb.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "xfs_alloc.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "xfs_ialloc.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "xfs_health.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "scrub/scrub.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "scrub/common.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "scrub/trace.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * FS Summary Counters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * ===================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * The basics of filesystem summary counter checking are that we iterate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * AGs counting the number of free blocks, free space btree blocks, per-AG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * reservations, inodes, delayed allocation reservations, and free inodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * Then we compare what we computed against the in-core counters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * However, the reality is that summary counters are a tricky beast to check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * While we /could/ freeze the filesystem and scramble around the AGs counting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * the free blocks, in practice we prefer not do that for a scan because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * freezing is costly. To get around this, we added a per-cpu counter of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * delalloc reservations so that we can rotor around the AGs relatively
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * quickly, and we allow the counts to be slightly off because we're not taking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * any locks while we do this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * So the first thing we do is warm up the buffer cache in the setup routine by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * walking all the AGs to make sure the incore per-AG structure has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * initialized. The expected value calculation then iterates the incore per-AG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * structures as quickly as it can. We snapshot the percpu counters before and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * after this operation and use the difference in counter values to guess at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * our tolerance for mismatch between expected and actual counter values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * Since the expected value computation is lockless but only browses incore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * values, the percpu counters should be fairly close to each other. However,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * we'll allow ourselves to be off by at least this (arbitrary) amount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #define XCHK_FSCOUNT_MIN_VARIANCE (512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * Make sure the per-AG structure has been initialized from the on-disk header
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * contents and trust that the incore counters match the ondisk counters. (The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * AGF and AGI scrubbers check them, and a normal xfs_scrub run checks the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * summary counters after checking all AG headers). Do this from the setup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * function so that the inner AG aggregation loop runs as quickly as possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * This function runs during the setup phase /before/ we start checking any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * metadata.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) xchk_fscount_warmup(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct xfs_scrub *sc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) struct xfs_mount *mp = sc->mp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) struct xfs_buf *agi_bp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) struct xfs_buf *agf_bp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) struct xfs_perag *pag = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) xfs_agnumber_t agno;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) pag = xfs_perag_get(mp, agno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) if (pag->pagi_init && pag->pagf_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) goto next_loop_perag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) /* Lock both AG headers. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) error = xfs_ialloc_read_agi(mp, sc->tp, agno, &agi_bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &agf_bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * These are supposed to be initialized by the header read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) error = -EFSCORRUPTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) if (!pag->pagi_init || !pag->pagf_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) xfs_buf_relse(agf_bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) agf_bp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) xfs_buf_relse(agi_bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) agi_bp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) next_loop_perag:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) xfs_perag_put(pag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) pag = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) if (xchk_should_terminate(sc, &error))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) if (agf_bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) xfs_buf_relse(agf_bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) if (agi_bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) xfs_buf_relse(agi_bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) if (pag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) xfs_perag_put(pag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) xchk_setup_fscounters(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) struct xfs_scrub *sc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) struct xfs_inode *ip)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) struct xchk_fscounters *fsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) sc->buf = kmem_zalloc(sizeof(struct xchk_fscounters), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) if (!sc->buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) fsc = sc->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) xfs_icount_range(sc->mp, &fsc->icount_min, &fsc->icount_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) /* We must get the incore counters set up before we can proceed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) error = xchk_fscount_warmup(sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * Pause background reclaim while we're scrubbing to reduce the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * likelihood of background perturbations to the counters throwing off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * our calculations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) xchk_stop_reaping(sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) return xchk_trans_alloc(sc, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * Calculate what the global in-core counters ought to be from the incore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * per-AG structure. Callers can compare this to the actual in-core counters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) * to estimate by how much both in-core and on-disk counters need to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * adjusted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) xchk_fscount_aggregate_agcounts(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) struct xfs_scrub *sc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) struct xchk_fscounters *fsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) struct xfs_mount *mp = sc->mp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) struct xfs_perag *pag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) uint64_t delayed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) xfs_agnumber_t agno;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) int tries = 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) fsc->icount = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) fsc->ifree = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) fsc->fdblocks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) pag = xfs_perag_get(mp, agno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) /* This somehow got unset since the warmup? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) if (!pag->pagi_init || !pag->pagf_init) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) xfs_perag_put(pag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) return -EFSCORRUPTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) /* Count all the inodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) fsc->icount += pag->pagi_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) fsc->ifree += pag->pagi_freecount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) /* Add up the free/freelist/bnobt/cntbt blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) fsc->fdblocks += pag->pagf_freeblks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) fsc->fdblocks += pag->pagf_flcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) fsc->fdblocks += pag->pagf_btreeblks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * Per-AG reservations are taken out of the incore counters,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) * so they must be left out of the free blocks computation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) fsc->fdblocks -= pag->pag_meta_resv.ar_reserved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) fsc->fdblocks -= pag->pag_rmapbt_resv.ar_orig_reserved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) xfs_perag_put(pag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) if (xchk_should_terminate(sc, &error))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * The global incore space reservation is taken from the incore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * counters, so leave that out of the computation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) fsc->fdblocks -= mp->m_resblks_avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) * Delayed allocation reservations are taken out of the incore counters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) * but not recorded on disk, so leave them and their indlen blocks out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) * of the computation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) delayed = percpu_counter_sum(&mp->m_delalloc_blks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) fsc->fdblocks -= delayed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) trace_xchk_fscounters_calc(mp, fsc->icount, fsc->ifree, fsc->fdblocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) delayed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) /* Bail out if the values we compute are totally nonsense. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) if (fsc->icount < fsc->icount_min || fsc->icount > fsc->icount_max ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) fsc->fdblocks > mp->m_sb.sb_dblocks ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) fsc->ifree > fsc->icount_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) return -EFSCORRUPTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * If ifree > icount then we probably had some perturbation in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * counters while we were calculating things. We'll try a few times
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * to maintain ifree <= icount before giving up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) if (fsc->ifree > fsc->icount) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) if (tries--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) xchk_set_incomplete(sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * Is the @counter reasonably close to the @expected value?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * We neither locked nor froze anything in the filesystem while aggregating the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * per-AG data to compute the @expected value, which means that the counter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * could have changed. We know the @old_value of the summation of the counter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * before the aggregation, and we re-sum the counter now. If the expected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * value falls between the two summations, we're ok.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * Otherwise, we /might/ have a problem. If the change in the summations is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * more than we want to tolerate, the filesystem is probably busy and we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * just send back INCOMPLETE and see if userspace will try again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) xchk_fscount_within_range(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) struct xfs_scrub *sc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) const int64_t old_value,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) struct percpu_counter *counter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) uint64_t expected)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) int64_t min_value, max_value;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) int64_t curr_value = percpu_counter_sum(counter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) trace_xchk_fscounters_within_range(sc->mp, expected, curr_value,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) old_value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) /* Negative values are always wrong. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) if (curr_value < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) /* Exact matches are always ok. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) if (curr_value == expected)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) min_value = min(old_value, curr_value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) max_value = max(old_value, curr_value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) /* Within the before-and-after range is ok. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) if (expected >= min_value && expected <= max_value)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * If the difference between the two summations is too large, the fs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * might just be busy and so we'll mark the scrub incomplete. Return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * true here so that we don't mark the counter corrupt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) * XXX: In the future when userspace can grant scrub permission to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) * quiesce the filesystem to solve the outsized variance problem, this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) * check should be moved up and the return code changed to signal to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * userspace that we need quiesce permission.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) xchk_set_incomplete(sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) /* Check the superblock counters. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) xchk_fscounters(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) struct xfs_scrub *sc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) struct xfs_mount *mp = sc->mp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) struct xchk_fscounters *fsc = sc->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) int64_t icount, ifree, fdblocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) /* Snapshot the percpu counters. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) icount = percpu_counter_sum(&mp->m_icount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) ifree = percpu_counter_sum(&mp->m_ifree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) fdblocks = percpu_counter_sum(&mp->m_fdblocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) /* No negative values, please! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) if (icount < 0 || ifree < 0 || fdblocks < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) xchk_set_corrupt(sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) /* See if icount is obviously wrong. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) if (icount < fsc->icount_min || icount > fsc->icount_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) xchk_set_corrupt(sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) /* See if fdblocks is obviously wrong. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) if (fdblocks > mp->m_sb.sb_dblocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) xchk_set_corrupt(sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) * If ifree exceeds icount by more than the minimum variance then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) * something's probably wrong with the counters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) if (ifree > icount && ifree - icount > XCHK_FSCOUNT_MIN_VARIANCE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) xchk_set_corrupt(sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) /* Walk the incore AG headers to calculate the expected counters. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) error = xchk_fscount_aggregate_agcounts(sc, fsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) /* Compare the in-core counters with whatever we counted. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) xchk_set_corrupt(sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) xchk_set_corrupt(sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) fsc->fdblocks))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) xchk_set_corrupt(sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) }