^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (c) 2000-2006 Silicon Graphics, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * All Rights Reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include "xfs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "xfs_shared.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "xfs_format.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "xfs_log_format.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "xfs_trans_resv.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "xfs_sb.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "xfs_mount.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "xfs_trace.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "xfs_log.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "xfs_log_recover.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "xfs_trans.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include "xfs_buf_item.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include "xfs_errortag.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include "xfs_error.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) static kmem_zone_t *xfs_buf_zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define xb_to_gfp(flags) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * Locking orders
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * xfs_buf_ioacct_inc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * xfs_buf_ioacct_dec:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * b_sema (caller holds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * b_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * xfs_buf_stale:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * b_sema (caller holds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * b_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * lru_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * xfs_buf_rele:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * b_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * pag_buf_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * lru_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * xfs_buftarg_wait_rele
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * lru_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * b_lock (trylock due to inversion)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * xfs_buftarg_isolate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * lru_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * b_lock (trylock due to inversion)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) static int __xfs_buf_submit(struct xfs_buf *bp, bool wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) xfs_buf_submit(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) return __xfs_buf_submit(bp, !(bp->b_flags & XBF_ASYNC));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) xfs_buf_is_vmapped(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * Return true if the buffer is vmapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * b_addr is null if the buffer is not mapped, but the code is clever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * enough to know it doesn't have to map a single page, so the check has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * to be both for b_addr and bp->b_page_count > 1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) return bp->b_addr && bp->b_page_count > 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) xfs_buf_vmap_len(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * Bump the I/O in flight count on the buftarg if we haven't yet done so for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * this buffer. The count is incremented once per buffer (per hold cycle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * because the corresponding decrement is deferred to buffer release. Buffers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * can undergo I/O multiple times in a hold-release cycle and per buffer I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * tracking adds unnecessary overhead. This is used for sychronization purposes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * with unmount (see xfs_wait_buftarg()), so all we really need is a count of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * in-flight buffers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * Buffers that are never released (e.g., superblock, iclog buffers) must set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * the XBF_NO_IOACCT flag before I/O submission. Otherwise, the buftarg count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * never reaches zero and unmount hangs indefinitely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) xfs_buf_ioacct_inc(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) if (bp->b_flags & XBF_NO_IOACCT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) ASSERT(bp->b_flags & XBF_ASYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) spin_lock(&bp->b_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) bp->b_state |= XFS_BSTATE_IN_FLIGHT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) percpu_counter_inc(&bp->b_target->bt_io_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) spin_unlock(&bp->b_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * Clear the in-flight state on a buffer about to be released to the LRU or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * freed and unaccount from the buftarg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) __xfs_buf_ioacct_dec(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) lockdep_assert_held(&bp->b_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) percpu_counter_dec(&bp->b_target->bt_io_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) xfs_buf_ioacct_dec(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) spin_lock(&bp->b_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) __xfs_buf_ioacct_dec(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) spin_unlock(&bp->b_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * When we mark a buffer stale, we remove the buffer from the LRU and clear the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * b_lru_ref count so that the buffer is freed immediately when the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * reference count falls to zero. If the buffer is already on the LRU, we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * to remove the reference that LRU holds on the buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * This prevents build-up of stale buffers on the LRU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) xfs_buf_stale(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) ASSERT(xfs_buf_islocked(bp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) bp->b_flags |= XBF_STALE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * Clear the delwri status so that a delwri queue walker will not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * flush this buffer to disk now that it is stale. The delwri queue has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * a reference to the buffer, so this is safe to do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) bp->b_flags &= ~_XBF_DELWRI_Q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * Once the buffer is marked stale and unlocked, a subsequent lookup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * could reset b_flags. There is no guarantee that the buffer is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * unaccounted (released to LRU) before that occurs. Drop in-flight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * status now to preserve accounting consistency.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) spin_lock(&bp->b_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) __xfs_buf_ioacct_dec(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) atomic_set(&bp->b_lru_ref, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) atomic_dec(&bp->b_hold);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) ASSERT(atomic_read(&bp->b_hold) >= 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) spin_unlock(&bp->b_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) xfs_buf_get_maps(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) struct xfs_buf *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) int map_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) ASSERT(bp->b_maps == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) bp->b_map_count = map_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) if (map_count == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) bp->b_maps = &bp->__b_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) KM_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) if (!bp->b_maps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) * Frees b_pages if it was allocated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) xfs_buf_free_maps(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) if (bp->b_maps != &bp->__b_map) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) kmem_free(bp->b_maps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) bp->b_maps = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) _xfs_buf_alloc(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) struct xfs_buftarg *target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) struct xfs_buf_map *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) int nmaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) xfs_buf_flags_t flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) struct xfs_buf **bpp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) struct xfs_buf *bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) *bpp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) bp = kmem_cache_zalloc(xfs_buf_zone, GFP_NOFS | __GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * We don't want certain flags to appear in b_flags unless they are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * specifically set by later operations on the buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) atomic_set(&bp->b_hold, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) atomic_set(&bp->b_lru_ref, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) init_completion(&bp->b_iowait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) INIT_LIST_HEAD(&bp->b_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) INIT_LIST_HEAD(&bp->b_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) INIT_LIST_HEAD(&bp->b_li_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) sema_init(&bp->b_sema, 0); /* held, no waiters */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) spin_lock_init(&bp->b_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) bp->b_target = target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) bp->b_mount = target->bt_mount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) bp->b_flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * Set length and io_length to the same value initially.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * I/O routines should use io_length, which will be the same in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * most cases but may be reset (e.g. XFS recovery).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) error = xfs_buf_get_maps(bp, nmaps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) kmem_cache_free(xfs_buf_zone, bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) bp->b_bn = map[0].bm_bn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) bp->b_length = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) for (i = 0; i < nmaps; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) bp->b_maps[i].bm_bn = map[i].bm_bn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) bp->b_maps[i].bm_len = map[i].bm_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) bp->b_length += map[i].bm_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) atomic_set(&bp->b_pin_count, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) init_waitqueue_head(&bp->b_waiters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) XFS_STATS_INC(bp->b_mount, xb_create);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) trace_xfs_buf_init(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) *bpp = bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) * Allocate a page array capable of holding a specified number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) * of pages, and point the page buf at it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) _xfs_buf_get_pages(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) xfs_buf_t *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) int page_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) /* Make sure that we have a page list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) if (bp->b_pages == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) bp->b_page_count = page_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) if (page_count <= XB_PAGES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) bp->b_pages = bp->b_page_array;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) bp->b_pages = kmem_alloc(sizeof(struct page *) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) page_count, KM_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) if (bp->b_pages == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * Frees b_pages if it was allocated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) _xfs_buf_free_pages(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) xfs_buf_t *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) if (bp->b_pages != bp->b_page_array) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) kmem_free(bp->b_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) bp->b_pages = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * Releases the specified buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * The modification state of any associated pages is left unchanged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * The buffer must not be on any hash - use xfs_buf_rele instead for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * hashed and refcounted buffers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) xfs_buf_free(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) xfs_buf_t *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) trace_xfs_buf_free(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) ASSERT(list_empty(&bp->b_lru));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) if (bp->b_flags & _XBF_PAGES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) uint i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) if (xfs_buf_is_vmapped(bp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) vm_unmap_ram(bp->b_addr - bp->b_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) bp->b_page_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) for (i = 0; i < bp->b_page_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) struct page *page = bp->b_pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) __free_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) if (current->reclaim_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) current->reclaim_state->reclaimed_slab +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) bp->b_page_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) } else if (bp->b_flags & _XBF_KMEM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) kmem_free(bp->b_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) _xfs_buf_free_pages(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) xfs_buf_free_maps(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) kmem_cache_free(xfs_buf_zone, bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) * Allocates all the pages for buffer in question and builds it's page list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) xfs_buf_allocate_memory(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) xfs_buf_t *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) uint flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) size_t nbytes, offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) gfp_t gfp_mask = xb_to_gfp(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) unsigned short page_count, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) xfs_off_t start, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) xfs_km_flags_t kmflag_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) * assure zeroed buffer for non-read cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) if (!(flags & XBF_READ)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) kmflag_mask |= KM_ZERO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) gfp_mask |= __GFP_ZERO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) * for buffers that are contained within a single page, just allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * the memory from the heap - there's no need for the complexity of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * page arrays to keep allocation down to order 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) size = BBTOB(bp->b_length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) if (size < PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) bp->b_addr = kmem_alloc_io(size, align_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) KM_NOFS | kmflag_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) if (!bp->b_addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) /* low memory - use alloc_page loop instead */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) goto use_alloc_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) ((unsigned long)bp->b_addr & PAGE_MASK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) /* b_addr spans two pages - use alloc_page instead */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) kmem_free(bp->b_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) bp->b_addr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) goto use_alloc_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) bp->b_offset = offset_in_page(bp->b_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) bp->b_pages = bp->b_page_array;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) bp->b_pages[0] = kmem_to_page(bp->b_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) bp->b_page_count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) bp->b_flags |= _XBF_KMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) use_alloc_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) page_count = end - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) error = _xfs_buf_get_pages(bp, page_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) if (unlikely(error))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) offset = bp->b_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) bp->b_flags |= _XBF_PAGES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) for (i = 0; i < bp->b_page_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) uint retries = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) page = alloc_page(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) if (unlikely(page == NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) if (flags & XBF_READ_AHEAD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) bp->b_page_count = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) error = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) goto out_free_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) * This could deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) * But until all the XFS lowlevel code is revamped to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) * handle buffer allocation failures we can't do much.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) if (!(++retries % 100))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) xfs_err(NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) current->comm, current->pid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) __func__, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) XFS_STATS_INC(bp->b_mount, xb_page_retries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) congestion_wait(BLK_RW_ASYNC, HZ/50);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) XFS_STATS_INC(bp->b_mount, xb_page_found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) nbytes = min_t(size_t, size, PAGE_SIZE - offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) size -= nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) bp->b_pages[i] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) out_free_pages:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) for (i = 0; i < bp->b_page_count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) __free_page(bp->b_pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) bp->b_flags &= ~_XBF_PAGES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) * Map buffer into kernel address-space if necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) _xfs_buf_map_pages(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) xfs_buf_t *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) uint flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) ASSERT(bp->b_flags & _XBF_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) if (bp->b_page_count == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) /* A single page buffer is always mappable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) } else if (flags & XBF_UNMAPPED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) bp->b_addr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) int retried = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) unsigned nofs_flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) * vm_map_ram() will allocate auxiliary structures (e.g.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) * pagetables) with GFP_KERNEL, yet we are likely to be under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) * GFP_NOFS context here. Hence we need to tell memory reclaim
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) * that we are in such a context via PF_MEMALLOC_NOFS to prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) * memory reclaim re-entering the filesystem here and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) * potentially deadlocking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) nofs_flag = memalloc_nofs_save();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) if (bp->b_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) vm_unmap_aliases();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) } while (retried++ <= 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) memalloc_nofs_restore(nofs_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) if (!bp->b_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) bp->b_addr += bp->b_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) * Finding and Reading Buffers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) _xfs_buf_obj_cmp(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) struct rhashtable_compare_arg *arg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) const void *obj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) const struct xfs_buf_map *map = arg->key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) const struct xfs_buf *bp = obj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * The key hashing in the lookup path depends on the key being the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * first element of the compare_arg, make sure to assert this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) if (bp->b_bn != map->bm_bn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) if (unlikely(bp->b_length != map->bm_len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * found a block number match. If the range doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * match, the only way this is allowed is if the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * in the cache is stale and the transaction that made
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * it stale has not yet committed. i.e. we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) * reallocating a busy extent. Skip this buffer and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * continue searching for an exact match.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) ASSERT(bp->b_flags & XBF_STALE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) static const struct rhashtable_params xfs_buf_hash_params = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) .min_size = 32, /* empty AGs have minimal footprint */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) .nelem_hint = 16,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) .key_len = sizeof(xfs_daddr_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) .key_offset = offsetof(struct xfs_buf, b_bn),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) .head_offset = offsetof(struct xfs_buf, b_rhash_head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) .automatic_shrinking = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) .obj_cmpfn = _xfs_buf_obj_cmp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) xfs_buf_hash_init(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) struct xfs_perag *pag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) spin_lock_init(&pag->pag_buf_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) xfs_buf_hash_destroy(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) struct xfs_perag *pag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) rhashtable_destroy(&pag->pag_buf_hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * Look up a buffer in the buffer cache and return it referenced and locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) * in @found_bp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * If @new_bp is supplied and we have a lookup miss, insert @new_bp into the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) * cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) * If XBF_TRYLOCK is set in @flags, only try to lock the buffer and return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) * -EAGAIN if we fail to lock it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) * Return values are:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) * -EFSCORRUPTED if have been supplied with an invalid address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) * -EAGAIN on trylock failure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) * -ENOENT if we fail to find a match and @new_bp was NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) * 0, with @found_bp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) * - @new_bp if we inserted it into the cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) * - the buffer we found and locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) xfs_buf_find(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) struct xfs_buftarg *btp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) struct xfs_buf_map *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) int nmaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) xfs_buf_flags_t flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) struct xfs_buf *new_bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) struct xfs_buf **found_bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) struct xfs_perag *pag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) xfs_buf_t *bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) xfs_daddr_t eofs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) *found_bp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) for (i = 0; i < nmaps; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) cmap.bm_len += map[i].bm_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) /* Check for IOs smaller than the sector size / not sector aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) * Corrupted block numbers can get through to here, unfortunately, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) * have to check that the buffer falls within the filesystem bounds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) xfs_alert(btp->bt_mount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) "%s: daddr 0x%llx out of range, EOFS 0x%llx",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) __func__, cmap.bm_bn, eofs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) return -EFSCORRUPTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) pag = xfs_perag_get(btp->bt_mount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) spin_lock(&pag->pag_buf_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) xfs_buf_hash_params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) if (bp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) atomic_inc(&bp->b_hold);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) goto found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) /* No match found */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) if (!new_bp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) spin_unlock(&pag->pag_buf_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) xfs_perag_put(pag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) /* the buffer keeps the perag reference until it is freed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) new_bp->b_pag = pag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) xfs_buf_hash_params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) spin_unlock(&pag->pag_buf_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) *found_bp = new_bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) found:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) spin_unlock(&pag->pag_buf_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) xfs_perag_put(pag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) if (!xfs_buf_trylock(bp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) if (flags & XBF_TRYLOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) xfs_buf_rele(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) XFS_STATS_INC(btp->bt_mount, xb_busy_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) xfs_buf_lock(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) * if the buffer is stale, clear all the external state associated with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) * it. We need to keep flags such as how we allocated the buffer memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) * intact here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) if (bp->b_flags & XBF_STALE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) bp->b_ops = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) trace_xfs_buf_find(bp, flags, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) XFS_STATS_INC(btp->bt_mount, xb_get_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) *found_bp = bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) struct xfs_buf *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) xfs_buf_incore(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) struct xfs_buftarg *target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) xfs_daddr_t blkno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) size_t numblks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) xfs_buf_flags_t flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) struct xfs_buf *bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) error = xfs_buf_find(target, &map, 1, flags, NULL, &bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) return bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * Assembles a buffer covering the specified range. The code is optimised for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) * cache hits, as metadata intensive workloads will see 3 orders of magnitude
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) * more hits than misses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) xfs_buf_get_map(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) struct xfs_buftarg *target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) struct xfs_buf_map *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) int nmaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) xfs_buf_flags_t flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) struct xfs_buf **bpp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) struct xfs_buf *bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) struct xfs_buf *new_bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) *bpp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) if (!error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) goto found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) if (error != -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) error = xfs_buf_allocate_memory(new_bp, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) xfs_buf_free(new_bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) xfs_buf_free(new_bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) if (bp != new_bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) xfs_buf_free(new_bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) found:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) if (!bp->b_addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) error = _xfs_buf_map_pages(bp, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) if (unlikely(error)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) xfs_warn_ratelimited(target->bt_mount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) "%s: failed to map %u pages", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) bp->b_page_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) xfs_buf_relse(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) * Clear b_error if this is a lookup from a caller that doesn't expect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) * valid data to be found in the buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) if (!(flags & XBF_READ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) xfs_buf_ioerror(bp, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) XFS_STATS_INC(target->bt_mount, xb_get);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) trace_xfs_buf_get(bp, flags, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) *bpp = bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) _xfs_buf_read(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) xfs_buf_t *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) xfs_buf_flags_t flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) ASSERT(!(flags & XBF_WRITE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) return xfs_buf_submit(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) * Reverify a buffer found in cache without an attached ->b_ops.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) * If the caller passed an ops structure and the buffer doesn't have ops
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) * assigned, set the ops and use it to verify the contents. If verification
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) * fails, clear XBF_DONE. We assume the buffer has no recorded errors and is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) * already in XBF_DONE state on entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) * Under normal operations, every in-core buffer is verified on read I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) * completion. There are two scenarios that can lead to in-core buffers without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) * an assigned ->b_ops. The first is during log recovery of buffers on a V4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) * filesystem, though these buffers are purged at the end of recovery. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) * other is online repair, which intentionally reads with a NULL buffer ops to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) * run several verifiers across an in-core buffer in order to establish buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) * type. If repair can't establish that, the buffer will be left in memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) * with NULL buffer ops.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) xfs_buf_reverify(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) struct xfs_buf *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) const struct xfs_buf_ops *ops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) ASSERT(bp->b_flags & XBF_DONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) ASSERT(bp->b_error == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) if (!ops || bp->b_ops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) bp->b_ops = ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) bp->b_ops->verify_read(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) if (bp->b_error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) bp->b_flags &= ~XBF_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) return bp->b_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) xfs_buf_read_map(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) struct xfs_buftarg *target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) struct xfs_buf_map *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) int nmaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) xfs_buf_flags_t flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) struct xfs_buf **bpp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) const struct xfs_buf_ops *ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) xfs_failaddr_t fa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) struct xfs_buf *bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) flags |= XBF_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) *bpp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) error = xfs_buf_get_map(target, map, nmaps, flags, &bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) trace_xfs_buf_read(bp, flags, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) if (!(bp->b_flags & XBF_DONE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) /* Initiate the buffer read and wait. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) XFS_STATS_INC(target->bt_mount, xb_get_read);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) bp->b_ops = ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) error = _xfs_buf_read(bp, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) /* Readahead iodone already dropped the buffer, so exit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) if (flags & XBF_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) /* Buffer already read; all we need to do is check it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) error = xfs_buf_reverify(bp, ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) /* Readahead already finished; drop the buffer and exit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) if (flags & XBF_ASYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) xfs_buf_relse(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) /* We do not want read in the flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) bp->b_flags &= ~XBF_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) ASSERT(bp->b_ops != NULL || ops == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) * If we've had a read error, then the contents of the buffer are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) * invalid and should not be used. To ensure that a followup read tries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) * to pull the buffer from disk again, we clear the XBF_DONE flag and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) * mark the buffer stale. This ensures that anyone who has a current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) * reference to the buffer will interpret it's contents correctly and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) * future cache lookups will also treat it as an empty, uninitialised
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) * buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) if (!XFS_FORCED_SHUTDOWN(target->bt_mount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) xfs_buf_ioerror_alert(bp, fa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) bp->b_flags &= ~XBF_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) xfs_buf_stale(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) xfs_buf_relse(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) /* bad CRC means corrupted metadata */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) if (error == -EFSBADCRC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) error = -EFSCORRUPTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) *bpp = bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) * If we are not low on memory then do the readahead in a deadlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) * safe manner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) xfs_buf_readahead_map(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) struct xfs_buftarg *target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) struct xfs_buf_map *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) int nmaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) const struct xfs_buf_ops *ops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) struct xfs_buf *bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) if (bdi_read_congested(target->bt_bdev->bd_bdi))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) xfs_buf_read_map(target, map, nmaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) __this_address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) * Read an uncached buffer from disk. Allocates and returns a locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) * buffer containing the disk contents or nothing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) xfs_buf_read_uncached(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) struct xfs_buftarg *target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) xfs_daddr_t daddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) size_t numblks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) struct xfs_buf **bpp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) const struct xfs_buf_ops *ops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) struct xfs_buf *bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) *bpp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) error = xfs_buf_get_uncached(target, numblks, flags, &bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) /* set up the buffer for a read IO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) ASSERT(bp->b_map_count == 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) bp->b_bn = XFS_BUF_DADDR_NULL; /* always null for uncached buffers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) bp->b_maps[0].bm_bn = daddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) bp->b_flags |= XBF_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) bp->b_ops = ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) xfs_buf_submit(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) if (bp->b_error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) error = bp->b_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) xfs_buf_relse(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) *bpp = bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) xfs_buf_get_uncached(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) struct xfs_buftarg *target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) size_t numblks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) struct xfs_buf **bpp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) unsigned long page_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) int error, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) struct xfs_buf *bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) *bpp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) /* flags might contain irrelevant bits, pass only what we care about */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) error = _xfs_buf_get_pages(bp, page_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) goto fail_free_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) for (i = 0; i < page_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) if (!bp->b_pages[i]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) error = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) goto fail_free_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) bp->b_flags |= _XBF_PAGES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) error = _xfs_buf_map_pages(bp, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) if (unlikely(error)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) xfs_warn(target->bt_mount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) "%s: failed to map pages", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) goto fail_free_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) trace_xfs_buf_get_uncached(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) *bpp = bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) fail_free_mem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) while (--i >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) __free_page(bp->b_pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) _xfs_buf_free_pages(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) fail_free_buf:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) xfs_buf_free_maps(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) kmem_cache_free(xfs_buf_zone, bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) * Increment reference count on buffer, to hold the buffer concurrently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) * with another thread which may release (free) the buffer asynchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) * Must hold the buffer already to call this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) xfs_buf_hold(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) xfs_buf_t *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) trace_xfs_buf_hold(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) atomic_inc(&bp->b_hold);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) * Release a hold on the specified buffer. If the hold count is 1, the buffer is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) * placed on LRU or freed (depending on b_lru_ref).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) xfs_buf_rele(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) xfs_buf_t *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) struct xfs_perag *pag = bp->b_pag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) bool release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) bool freebuf = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) trace_xfs_buf_rele(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) if (!pag) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) ASSERT(list_empty(&bp->b_lru));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) if (atomic_dec_and_test(&bp->b_hold)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) xfs_buf_ioacct_dec(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) xfs_buf_free(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) ASSERT(atomic_read(&bp->b_hold) > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) * We grab the b_lock here first to serialise racing xfs_buf_rele()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) * calls. The pag_buf_lock being taken on the last reference only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) * serialises against racing lookups in xfs_buf_find(). IOWs, the second
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) * to last reference we drop here is not serialised against the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) * reference until we take bp->b_lock. Hence if we don't grab b_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) * first, the last "release" reference can win the race to the lock and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) * free the buffer before the second-to-last reference is processed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) * leading to a use-after-free scenario.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) spin_lock(&bp->b_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) if (!release) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) * Drop the in-flight state if the buffer is already on the LRU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) * and it holds the only reference. This is racy because we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) * haven't acquired the pag lock, but the use of _XBF_IN_FLIGHT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) * ensures the decrement occurs only once per-buf.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) __xfs_buf_ioacct_dec(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) /* the last reference has been dropped ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) __xfs_buf_ioacct_dec(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) * If the buffer is added to the LRU take a new reference to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) * buffer for the LRU and clear the (now stale) dispose list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) * state flag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) bp->b_state &= ~XFS_BSTATE_DISPOSE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) atomic_inc(&bp->b_hold);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) spin_unlock(&pag->pag_buf_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) * most of the time buffers will already be removed from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) * LRU, so optimise that case by checking for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) * XFS_BSTATE_DISPOSE flag indicating the last list the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) * was on was the disposal list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) ASSERT(list_empty(&bp->b_lru));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) xfs_buf_hash_params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) spin_unlock(&pag->pag_buf_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) xfs_perag_put(pag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) freebuf = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) spin_unlock(&bp->b_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) if (freebuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) xfs_buf_free(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) * Lock a buffer object, if it is not already locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) * If we come across a stale, pinned, locked buffer, we know that we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) * being asked to lock a buffer that has been reallocated. Because it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) * pinned, we know that the log has not been pushed to disk and hence it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) * will still be locked. Rather than continuing to have trylock attempts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) * fail until someone else pushes the log, push it ourselves before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) * returning. This means that the xfsaild will not get stuck trying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) * to push on stale inode buffers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) xfs_buf_trylock(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) int locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) locked = down_trylock(&bp->b_sema) == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) trace_xfs_buf_trylock(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) trace_xfs_buf_trylock_fail(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) return locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * Lock a buffer object.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) * If we come across a stale, pinned, locked buffer, we know that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) * are being asked to lock a buffer that has been reallocated. Because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) * it is pinned, we know that the log has not been pushed to disk and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) * hence it will still be locked. Rather than sleeping until someone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) * else pushes the log, push it ourselves before trying to get the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) xfs_buf_lock(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) trace_xfs_buf_lock(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) xfs_log_force(bp->b_mount, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) down(&bp->b_sema);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) trace_xfs_buf_lock_done(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) xfs_buf_unlock(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) ASSERT(xfs_buf_islocked(bp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) up(&bp->b_sema);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) trace_xfs_buf_unlock(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) xfs_buf_wait_unpin(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) xfs_buf_t *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) DECLARE_WAITQUEUE (wait, current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) if (atomic_read(&bp->b_pin_count) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) add_wait_queue(&bp->b_waiters, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) set_current_state(TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) if (atomic_read(&bp->b_pin_count) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) io_schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) remove_wait_queue(&bp->b_waiters, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) xfs_buf_ioerror_alert_ratelimited(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) static unsigned long lasttime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) static struct xfs_buftarg *lasttarg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) if (bp->b_target != lasttarg ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) time_after(jiffies, (lasttime + 5*HZ))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) lasttime = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) xfs_buf_ioerror_alert(bp, __this_address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) lasttarg = bp->b_target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) * Account for this latest trip around the retry handler, and decide if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) * we've failed enough times to constitute a permanent failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) static bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) xfs_buf_ioerror_permanent(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) struct xfs_buf *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) struct xfs_error_cfg *cfg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) struct xfs_mount *mp = bp->b_mount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) ++bp->b_retries > cfg->max_retries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) /* At unmount we may treat errors differently */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) * On a sync write or shutdown we just want to stale the buffer and let the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) * caller handle the error in bp->b_error appropriately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) * If the write was asynchronous then no one will be looking for the error. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) * this is the first failure of this type, clear the error state and write the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) * buffer out again. This means we always retry an async write failure at least
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) * once, but we also need to set the buffer up to behave correctly now for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) * repeated failures.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) * If we get repeated async write failures, then we take action according to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) * error configuration we have been set up to use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) * Returns true if this function took care of error handling and the caller must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) * not touch the buffer again. Return false if the caller should proceed with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) * normal I/O completion handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) static bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) xfs_buf_ioend_handle_error(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) struct xfs_mount *mp = bp->b_mount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) struct xfs_error_cfg *cfg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) * If we've already decided to shutdown the filesystem because of I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) * errors, there's no point in giving this a retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) if (XFS_FORCED_SHUTDOWN(mp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) goto out_stale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) xfs_buf_ioerror_alert_ratelimited(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) * We're not going to bother about retrying this during recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) * One strike!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) if (bp->b_flags & _XBF_LOGRECOVERY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) * Synchronous writes will have callers process the error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) if (!(bp->b_flags & XBF_ASYNC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) goto out_stale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) trace_xfs_buf_iodone_async(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) if (bp->b_last_error != bp->b_error ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) !(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) bp->b_last_error = bp->b_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) !bp->b_first_retry_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) bp->b_first_retry_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) goto resubmit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) * Permanent error - we need to trigger a shutdown if we haven't already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) * to indicate that inconsistency will result from this action.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) if (xfs_buf_ioerror_permanent(bp, cfg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) goto out_stale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) /* Still considered a transient error. Caller will schedule retries. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) if (bp->b_flags & _XBF_INODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) xfs_buf_inode_io_fail(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) else if (bp->b_flags & _XBF_DQUOTS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) xfs_buf_dquot_io_fail(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) ASSERT(list_empty(&bp->b_li_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) xfs_buf_ioerror(bp, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) xfs_buf_relse(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) resubmit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) xfs_buf_ioerror(bp, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) xfs_buf_submit(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) out_stale:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) xfs_buf_stale(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) bp->b_flags |= XBF_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) bp->b_flags &= ~XBF_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) trace_xfs_buf_error_relse(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) xfs_buf_ioend(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) trace_xfs_buf_iodone(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) * Pull in IO completion errors now. We are guaranteed to be running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) * single threaded, so we don't need the lock to read b_io_error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) if (!bp->b_error && bp->b_io_error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) xfs_buf_ioerror(bp, bp->b_io_error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) if (bp->b_flags & XBF_READ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) if (!bp->b_error && bp->b_ops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) bp->b_ops->verify_read(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) if (!bp->b_error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) bp->b_flags |= XBF_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) if (!bp->b_error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) bp->b_flags &= ~XBF_WRITE_FAIL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) bp->b_flags |= XBF_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) /* clear the retry state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) bp->b_last_error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) bp->b_retries = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) bp->b_first_retry_time = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) * Note that for things like remote attribute buffers, there may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) * not be a buffer log item here, so processing the buffer log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) * item must remain optional.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) if (bp->b_log_item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) xfs_buf_item_done(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) if (bp->b_flags & _XBF_INODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) xfs_buf_inode_iodone(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) else if (bp->b_flags & _XBF_DQUOTS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) xfs_buf_dquot_iodone(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) _XBF_LOGRECOVERY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) if (bp->b_flags & XBF_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) xfs_buf_relse(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) complete(&bp->b_iowait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) xfs_buf_ioend_work(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) struct xfs_buf *bp =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) container_of(work, xfs_buf_t, b_ioend_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) xfs_buf_ioend(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) xfs_buf_ioend_async(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) __xfs_buf_ioerror(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) xfs_buf_t *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) int error,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) xfs_failaddr_t failaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) ASSERT(error <= 0 && error >= -1000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) bp->b_error = error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) trace_xfs_buf_ioerror(bp, error, failaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) xfs_buf_ioerror_alert(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) struct xfs_buf *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) xfs_failaddr_t func)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) "metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) func, (uint64_t)XFS_BUF_ADDR(bp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) bp->b_length, -bp->b_error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) * To simulate an I/O failure, the buffer must be locked and held with at least
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) * three references. The LRU reference is dropped by the stale call. The buf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) * item reference is dropped via ioend processing. The third reference is owned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) * by the caller and is dropped on I/O completion if the buffer is XBF_ASYNC.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) xfs_buf_ioend_fail(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) bp->b_flags &= ~XBF_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) xfs_buf_stale(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) xfs_buf_ioerror(bp, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) xfs_buf_ioend(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) xfs_bwrite(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) ASSERT(xfs_buf_islocked(bp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) bp->b_flags |= XBF_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) XBF_DONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) error = xfs_buf_submit(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) xfs_buf_bio_end_io(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) if (!bio->bi_status &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) bio->bi_status = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) * don't overwrite existing errors - otherwise we can lose errors on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) * buffers that require multiple bios to complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) if (bio->bi_status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) int error = blk_status_to_errno(bio->bi_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) cmpxchg(&bp->b_io_error, 0, error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) xfs_buf_ioend_async(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) xfs_buf_ioapply_map(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) struct xfs_buf *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) int map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) int *buf_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) int *count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) int op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) int page_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) int total_nr_pages = bp->b_page_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) int nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) sector_t sector = bp->b_maps[map].bm_bn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) int size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) int offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) /* skip the pages in the buffer before the start offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) page_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) offset = *buf_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) while (offset >= PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) page_index++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) offset -= PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) * Limit the IO size to the length of the current vector, and update the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) * remaining IO count for the next time around.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) *count -= size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) *buf_offset += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) next_chunk:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) atomic_inc(&bp->b_io_remaining);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) bio = bio_alloc(GFP_NOIO, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) bio_set_dev(bio, bp->b_target->bt_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) bio->bi_iter.bi_sector = sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) bio->bi_end_io = xfs_buf_bio_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) bio->bi_private = bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) bio->bi_opf = op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) for (; size && nr_pages; nr_pages--, page_index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) int rbytes, nbytes = PAGE_SIZE - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) if (nbytes > size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) nbytes = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) if (rbytes < nbytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) sector += BTOBB(nbytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) size -= nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) total_nr_pages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) if (likely(bio->bi_iter.bi_size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) if (xfs_buf_is_vmapped(bp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) flush_kernel_vmap_range(bp->b_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) xfs_buf_vmap_len(bp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) if (size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) goto next_chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) * This is guaranteed not to be the last io reference count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) * because the caller (xfs_buf_submit) holds a count itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) atomic_dec(&bp->b_io_remaining);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) xfs_buf_ioerror(bp, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) _xfs_buf_ioapply(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) int op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) int offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) int size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) * Make sure we capture only current IO errors rather than stale errors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) * left over from previous use of the buffer (e.g. failed readahead).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) bp->b_error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) if (bp->b_flags & XBF_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) op = REQ_OP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) * Run the write verifier callback function if it exists. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) * this function fails it will mark the buffer with an error and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) * the IO should not be dispatched.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) if (bp->b_ops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) bp->b_ops->verify_write(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) if (bp->b_error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) xfs_force_shutdown(bp->b_mount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) SHUTDOWN_CORRUPT_INCORE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) struct xfs_mount *mp = bp->b_mount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) * non-crc filesystems don't attach verifiers during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) * log recovery, so don't warn for such filesystems.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) if (xfs_sb_version_hascrc(&mp->m_sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) xfs_warn(mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) "%s: no buf ops on daddr 0x%llx len %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) __func__, bp->b_bn, bp->b_length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) xfs_hex_dump(bp->b_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) XFS_CORRUPTION_DUMP_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) op = REQ_OP_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) if (bp->b_flags & XBF_READ_AHEAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) op |= REQ_RAHEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) /* we only use the buffer cache for meta-data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) op |= REQ_META;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) * Walk all the vectors issuing IO on them. Set up the initial offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) * into the buffer and the desired IO size before we start -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) * _xfs_buf_ioapply_vec() will modify them appropriately for each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) * subsequent call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) offset = bp->b_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) size = BBTOB(bp->b_length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) for (i = 0; i < bp->b_map_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) xfs_buf_ioapply_map(bp, i, &offset, &size, op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) if (bp->b_error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) if (size <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) break; /* all done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) * Wait for I/O completion of a sync buffer and return the I/O error code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) xfs_buf_iowait(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) struct xfs_buf *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) ASSERT(!(bp->b_flags & XBF_ASYNC));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) trace_xfs_buf_iowait(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) wait_for_completion(&bp->b_iowait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) trace_xfs_buf_iowait_done(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) return bp->b_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) * Buffer I/O submission path, read or write. Asynchronous submission transfers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) * the buffer lock ownership and the current reference to the IO. It is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) * safe to reference the buffer after a call to this function unless the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) * holds an additional reference itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) __xfs_buf_submit(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) struct xfs_buf *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) bool wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) trace_xfs_buf_submit(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) /* on shutdown we stale and complete the buffer immediately */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) xfs_buf_ioend_fail(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) * Grab a reference so the buffer does not go away underneath us. For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) * async buffers, I/O completion drops the callers reference, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) * could occur before submission returns.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) xfs_buf_hold(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) if (bp->b_flags & XBF_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) xfs_buf_wait_unpin(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) /* clear the internal error state to avoid spurious errors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) bp->b_io_error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) * Set the count to 1 initially, this will stop an I/O completion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) * callout which happens before we have started all the I/O from calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) * xfs_buf_ioend too early.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) atomic_set(&bp->b_io_remaining, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) if (bp->b_flags & XBF_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) xfs_buf_ioacct_inc(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) _xfs_buf_ioapply(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) * If _xfs_buf_ioapply failed, we can get back here with only the IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) * reference we took above. If we drop it to zero, run completion so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) * that we don't return to the caller with completion still pending.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) xfs_buf_ioend(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) xfs_buf_ioend_async(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) error = xfs_buf_iowait(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) * Release the hold that keeps the buffer referenced for the entire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) * I/O. Note that if the buffer is async, it is not safe to reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) * after this release.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) xfs_buf_rele(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) void *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) xfs_buf_offset(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) struct xfs_buf *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) size_t offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) if (bp->b_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) return bp->b_addr + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) offset += bp->b_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) page = bp->b_pages[offset >> PAGE_SHIFT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) return page_address(page) + (offset & (PAGE_SIZE-1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) xfs_buf_zero(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) struct xfs_buf *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) size_t boff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) size_t bsize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) size_t bend;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) bend = boff + bsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) while (boff < bend) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) int page_index, page_offset, csize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) page_index = (boff + bp->b_offset) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) page = bp->b_pages[page_index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) csize = min_t(size_t, PAGE_SIZE - page_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) BBTOB(bp->b_length) - boff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) ASSERT((csize + page_offset) <= PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) memset(page_address(page) + page_offset, 0, csize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) boff += csize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) * Log a message about and stale a buffer that a caller has decided is corrupt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) * This function should be called for the kinds of metadata corruption that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) * cannot be detect from a verifier, such as incorrect inter-block relationship
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) * data. Do /not/ call this function from a verifier function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) * The buffer must be XBF_DONE prior to the call. Afterwards, the buffer will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) * be marked stale, but b_error will not be set. The caller is responsible for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) * releasing the buffer or fixing it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) __xfs_buf_mark_corrupt(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) struct xfs_buf *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) xfs_failaddr_t fa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) ASSERT(bp->b_flags & XBF_DONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) xfs_buf_corruption_error(bp, fa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) xfs_buf_stale(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) * Handling of buffer targets (buftargs).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) * Wait for any bufs with callbacks that have been submitted but have not yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) * returned. These buffers will have an elevated hold count, so wait on those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) * while freeing all the buffers only held by the LRU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) static enum lru_status
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) xfs_buftarg_wait_rele(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) struct list_head *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) struct list_lru_one *lru,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) spinlock_t *lru_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) struct list_head *dispose = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) if (atomic_read(&bp->b_hold) > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) /* need to wait, so skip it this pass */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) return LRU_SKIP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) if (!spin_trylock(&bp->b_lock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) return LRU_SKIP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) * clear the LRU reference count so the buffer doesn't get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) * ignored in xfs_buf_rele().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) atomic_set(&bp->b_lru_ref, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) bp->b_state |= XFS_BSTATE_DISPOSE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) list_lru_isolate_move(lru, item, dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) spin_unlock(&bp->b_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) return LRU_REMOVED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) xfs_wait_buftarg(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) struct xfs_buftarg *btp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) LIST_HEAD(dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) int loop = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) bool write_fail = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) * First wait on the buftarg I/O count for all in-flight buffers to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) * released. This is critical as new buffers do not make the LRU until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) * they are released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) * Next, flush the buffer workqueue to ensure all completion processing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) * has finished. Just waiting on buffer locks is not sufficient for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) * async IO as the reference count held over IO is not released until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) * after the buffer lock is dropped. Hence we need to ensure here that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) * all reference counts have been dropped before we start walking the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) * LRU list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) while (percpu_counter_sum(&btp->bt_io_count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) delay(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) flush_workqueue(btp->bt_mount->m_buf_workqueue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) /* loop until there is nothing left on the lru list. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) while (list_lru_count(&btp->bt_lru)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) &dispose, LONG_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) while (!list_empty(&dispose)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) struct xfs_buf *bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) list_del_init(&bp->b_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) if (bp->b_flags & XBF_WRITE_FAIL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) write_fail = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) xfs_buf_alert_ratelimited(bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) "XFS: Corruption Alert",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) (long long)bp->b_bn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) xfs_buf_rele(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) if (loop++ != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) delay(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) * If one or more failed buffers were freed, that means dirty metadata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) * was thrown away. This should only ever happen after I/O completion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) * handling has elevated I/O error(s) to permanent failures and shuts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) * down the fs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) if (write_fail) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) ASSERT(XFS_FORCED_SHUTDOWN(btp->bt_mount));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) xfs_alert(btp->bt_mount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) "Please run xfs_repair to determine the extent of the problem.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) static enum lru_status
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) xfs_buftarg_isolate(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) struct list_head *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) struct list_lru_one *lru,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) spinlock_t *lru_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) struct list_head *dispose = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) * we are inverting the lru lock/bp->b_lock here, so use a trylock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) * If we fail to get the lock, just skip it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) if (!spin_trylock(&bp->b_lock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) return LRU_SKIP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) * Decrement the b_lru_ref count unless the value is already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) * zero. If the value is already zero, we need to reclaim the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) * buffer, otherwise it gets another trip through the LRU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) spin_unlock(&bp->b_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) return LRU_ROTATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) bp->b_state |= XFS_BSTATE_DISPOSE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) list_lru_isolate_move(lru, item, dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) spin_unlock(&bp->b_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) return LRU_REMOVED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) static unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) xfs_buftarg_shrink_scan(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) struct shrinker *shrink,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) struct shrink_control *sc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) struct xfs_buftarg *btp = container_of(shrink,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) struct xfs_buftarg, bt_shrinker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) LIST_HEAD(dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) unsigned long freed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) freed = list_lru_shrink_walk(&btp->bt_lru, sc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) xfs_buftarg_isolate, &dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) while (!list_empty(&dispose)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) struct xfs_buf *bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) list_del_init(&bp->b_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) xfs_buf_rele(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) return freed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) static unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) xfs_buftarg_shrink_count(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) struct shrinker *shrink,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) struct shrink_control *sc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) struct xfs_buftarg *btp = container_of(shrink,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) struct xfs_buftarg, bt_shrinker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) return list_lru_shrink_count(&btp->bt_lru, sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) xfs_free_buftarg(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) struct xfs_buftarg *btp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) unregister_shrinker(&btp->bt_shrinker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) percpu_counter_destroy(&btp->bt_io_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) list_lru_destroy(&btp->bt_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) xfs_blkdev_issue_flush(btp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) kmem_free(btp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) xfs_setsize_buftarg(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) xfs_buftarg_t *btp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) unsigned int sectorsize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) /* Set up metadata sector size info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) btp->bt_meta_sectorsize = sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) btp->bt_meta_sectormask = sectorsize - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) if (set_blocksize(btp->bt_bdev, sectorsize)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) xfs_warn(btp->bt_mount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) "Cannot set_blocksize to %u on device %pg",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) sectorsize, btp->bt_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) /* Set up device logical sector size mask */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) * When allocating the initial buffer target we have not yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) * read in the superblock, so don't know what sized sectors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) * are being used at this early stage. Play safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) xfs_setsize_buftarg_early(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) xfs_buftarg_t *btp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) struct block_device *bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) xfs_buftarg_t *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) xfs_alloc_buftarg(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) struct block_device *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) struct dax_device *dax_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) xfs_buftarg_t *btp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) btp = kmem_zalloc(sizeof(*btp), KM_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) btp->bt_mount = mp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) btp->bt_dev = bdev->bd_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) btp->bt_bdev = bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) btp->bt_daxdev = dax_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) * Buffer IO error rate limiting. Limit it to no more than 10 messages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) * per 30 seconds so as to not spam logs too much on repeated errors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) DEFAULT_RATELIMIT_BURST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) if (xfs_setsize_buftarg_early(btp, bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) goto error_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) if (list_lru_init(&btp->bt_lru))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) goto error_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) goto error_lru;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) btp->bt_shrinker.seeks = DEFAULT_SEEKS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) if (register_shrinker(&btp->bt_shrinker))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) goto error_pcpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) return btp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) error_pcpu:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) percpu_counter_destroy(&btp->bt_io_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) error_lru:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) list_lru_destroy(&btp->bt_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) error_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) kmem_free(btp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) * Cancel a delayed write list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) * Remove each buffer from the list, clear the delwri queue flag and drop the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) * associated buffer reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) xfs_buf_delwri_cancel(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) struct list_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) struct xfs_buf *bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) while (!list_empty(list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) bp = list_first_entry(list, struct xfs_buf, b_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) xfs_buf_lock(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) bp->b_flags &= ~_XBF_DELWRI_Q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) list_del_init(&bp->b_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) xfs_buf_relse(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) * Add a buffer to the delayed write list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) * This queues a buffer for writeout if it hasn't already been. Note that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) * neither this routine nor the buffer list submission functions perform
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) * any internal synchronization. It is expected that the lists are thread-local
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) * to the callers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) * Returns true if we queued up the buffer, or false if it already had
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) * been on the buffer list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) xfs_buf_delwri_queue(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) struct xfs_buf *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) struct list_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) ASSERT(xfs_buf_islocked(bp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) ASSERT(!(bp->b_flags & XBF_READ));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) * If the buffer is already marked delwri it already is queued up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) * by someone else for imediate writeout. Just ignore it in that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) * case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) if (bp->b_flags & _XBF_DELWRI_Q) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) trace_xfs_buf_delwri_queued(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) trace_xfs_buf_delwri_queue(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) * If a buffer gets written out synchronously or marked stale while it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) * is on a delwri list we lazily remove it. To do this, the other party
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) * clears the _XBF_DELWRI_Q flag but otherwise leaves the buffer alone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) * It remains referenced and on the list. In a rare corner case it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) * might get readded to a delwri list after the synchronous writeout, in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) * which case we need just need to re-add the flag here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) bp->b_flags |= _XBF_DELWRI_Q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) if (list_empty(&bp->b_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) atomic_inc(&bp->b_hold);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) list_add_tail(&bp->b_list, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) * Compare function is more complex than it needs to be because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) * the return value is only 32 bits and we are doing comparisons
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) * on 64 bit values
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) xfs_buf_cmp(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) void *priv,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) struct list_head *a,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) struct list_head *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) xfs_daddr_t diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) if (diff < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) if (diff > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) * Submit buffers for write. If wait_list is specified, the buffers are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) * submitted using sync I/O and placed on the wait list such that the caller can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) * iowait each buffer. Otherwise async I/O is used and the buffers are released
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) * at I/O completion time. In either case, buffers remain locked until I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) * completes and the buffer is released from the queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) xfs_buf_delwri_submit_buffers(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) struct list_head *buffer_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) struct list_head *wait_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) struct xfs_buf *bp, *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) int pinned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) list_sort(NULL, buffer_list, xfs_buf_cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) list_for_each_entry_safe(bp, n, buffer_list, b_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) if (!wait_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) if (xfs_buf_ispinned(bp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) pinned++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) if (!xfs_buf_trylock(bp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) xfs_buf_lock(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) * Someone else might have written the buffer synchronously or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) * marked it stale in the meantime. In that case only the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) * _XBF_DELWRI_Q flag got cleared, and we have to drop the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) * reference and remove it from the list here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) if (!(bp->b_flags & _XBF_DELWRI_Q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) list_del_init(&bp->b_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) xfs_buf_relse(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) trace_xfs_buf_delwri_split(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) * If we have a wait list, each buffer (and associated delwri
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) * queue reference) transfers to it and is submitted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) * synchronously. Otherwise, drop the buffer from the delwri
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) * queue and submit async.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) bp->b_flags &= ~_XBF_DELWRI_Q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) bp->b_flags |= XBF_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) if (wait_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) bp->b_flags &= ~XBF_ASYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) list_move_tail(&bp->b_list, wait_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) bp->b_flags |= XBF_ASYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) list_del_init(&bp->b_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) __xfs_buf_submit(bp, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) return pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) * Write out a buffer list asynchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) * This will take the @buffer_list, write all non-locked and non-pinned buffers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) * out and not wait for I/O completion on any of the buffers. This interface
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) * is only safely useable for callers that can track I/O completion by higher
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) * level means, e.g. AIL pushing as the @buffer_list is consumed in this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) * function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) * Note: this function will skip buffers it would block on, and in doing so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) * leaves them on @buffer_list so they can be retried on a later pass. As such,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) * it is up to the caller to ensure that the buffer list is fully submitted or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) * cancelled appropriately when they are finished with the list. Failure to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) * cancel or resubmit the list until it is empty will result in leaked buffers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) * at unmount time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) xfs_buf_delwri_submit_nowait(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) struct list_head *buffer_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) return xfs_buf_delwri_submit_buffers(buffer_list, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) * Write out a buffer list synchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) * This will take the @buffer_list, write all buffers out and wait for I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) * completion on all of the buffers. @buffer_list is consumed by the function,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) * so callers must have some other way of tracking buffers if they require such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) * functionality.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) xfs_buf_delwri_submit(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) struct list_head *buffer_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) LIST_HEAD (wait_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) int error = 0, error2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) struct xfs_buf *bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) xfs_buf_delwri_submit_buffers(buffer_list, &wait_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) /* Wait for IO to complete. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) while (!list_empty(&wait_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) list_del_init(&bp->b_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) * Wait on the locked buffer, check for errors and unlock and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) * release the delwri queue reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) error2 = xfs_buf_iowait(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) xfs_buf_relse(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) if (!error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) error = error2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) * Push a single buffer on a delwri queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) * The purpose of this function is to submit a single buffer of a delwri queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) * and return with the buffer still on the original queue. The waiting delwri
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) * buffer submission infrastructure guarantees transfer of the delwri queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) * buffer reference to a temporary wait list. We reuse this infrastructure to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) * transfer the buffer back to the original queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) * Note the buffer transitions from the queued state, to the submitted and wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) * listed state and back to the queued state during this call. The buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) * locking and queue management logic between _delwri_pushbuf() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) * _delwri_queue() guarantee that the buffer cannot be queued to another list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) * before returning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) xfs_buf_delwri_pushbuf(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) struct xfs_buf *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) struct list_head *buffer_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) LIST_HEAD (submit_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) ASSERT(bp->b_flags & _XBF_DELWRI_Q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) * Isolate the buffer to a new local list so we can submit it for I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) * independently from the rest of the original list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) xfs_buf_lock(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) list_move(&bp->b_list, &submit_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) xfs_buf_unlock(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) * Delwri submission clears the DELWRI_Q buffer flag and returns with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) * the buffer on the wait list with the original reference. Rather than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) * bounce the buffer from a local wait list back to the original list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) * after I/O completion, reuse the original list as the wait list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) * The buffer is now locked, under I/O and wait listed on the original
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) * delwri queue. Wait for I/O completion, restore the DELWRI_Q flag and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) * return with the buffer unlocked and on the original queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) error = xfs_buf_iowait(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) bp->b_flags |= _XBF_DELWRI_Q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) xfs_buf_unlock(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) int __init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) xfs_buf_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) xfs_buf_zone = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) SLAB_HWCACHE_ALIGN |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) SLAB_RECLAIM_ACCOUNT |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) SLAB_MEM_SPREAD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) if (!xfs_buf_zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) xfs_buf_terminate(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) kmem_cache_destroy(xfs_buf_zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) * Set the lru reference count to 0 based on the error injection tag.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) * This allows userspace to disrupt buffer caching for debug/testing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) * purposes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) lru_ref = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) atomic_set(&bp->b_lru_ref, lru_ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) * Verify an on-disk magic value against the magic value specified in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) * verifier structure. The verifier magic is in disk byte order so the caller is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) * expected to pass the value directly from disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) xfs_verify_magic(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) struct xfs_buf *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) __be32 dmagic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) struct xfs_mount *mp = bp->b_mount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) int idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) idx = xfs_sb_version_hascrc(&mp->m_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) return dmagic == bp->b_ops->magic[idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) * Verify an on-disk magic value against the magic value specified in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) * verifier structure. The verifier magic is in disk byte order so the caller is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) * expected to pass the value directly from disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) xfs_verify_magic16(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) struct xfs_buf *bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) __be16 dmagic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) struct xfs_mount *mp = bp->b_mount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) int idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) idx = xfs_sb_version_hascrc(&mp->m_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) return dmagic == bp->b_ops->magic16[idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) }