^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (c) 2010 Red Hat, Inc. All Rights Reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include "xfs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include "xfs_fs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include "xfs_format.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "xfs_log_format.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "xfs_shared.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "xfs_trans_resv.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "xfs_mount.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "xfs_extent_busy.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "xfs_trans.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "xfs_trans_priv.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "xfs_log.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "xfs_log_priv.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "xfs_trace.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) struct workqueue_struct *xfs_discard_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * Allocate a new ticket. Failing to get a new ticket makes it really hard to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * recover, so we don't allow failure here. Also, we allocate in a context that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * we don't want to be issuing transactions from, so we need to tell the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * allocation code this as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * We don't reserve any space for the ticket - we are going to steal whatever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * space we require from transactions as they commit. To ensure we reserve all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * the space required, we need to set the current reservation of the ticket to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * zero so that we know to steal the initial transaction overhead from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * first transaction commit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static struct xlog_ticket *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) xlog_cil_ticket_alloc(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) struct xlog_ticket *tic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * set the current reservation to zero so we know to steal the basic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * transaction overhead reservation from the first transaction commit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) tic->t_curr_res = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) return tic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * After the first stage of log recovery is done, we know where the head and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * tail of the log are. We need this log initialisation done before we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * initialise the first CIL checkpoint context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * Here we allocate a log ticket to track space usage during a CIL push. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * ticket is passed to xlog_write() directly so that we don't slowly leak log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * space by failing to account for space used by log headers and additional
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * region headers for split regions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) xlog_cil_init_post_recovery(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) log->l_cilp->xc_ctx->sequence = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) xlog_cil_iovec_space(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) uint niovecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) return round_up((sizeof(struct xfs_log_vec) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) niovecs * sizeof(struct xfs_log_iovec)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) sizeof(uint64_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * Allocate or pin log vector buffers for CIL insertion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * The CIL currently uses disposable buffers for copying a snapshot of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * modified items into the log during a push. The biggest problem with this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * the requirement to allocate the disposable buffer during the commit if:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * a) does not exist; or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * b) it is too small
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * If we do this allocation within xlog_cil_insert_format_items(), it is done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * under the xc_ctx_lock, which means that a CIL push cannot occur during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * the memory allocation. This means that we have a potential deadlock situation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * under low memory conditions when we have lots of dirty metadata pinned in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * the CIL and we need a CIL commit to occur to free memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * To avoid this, we need to move the memory allocation outside the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * xc_ctx_lock, but because the log vector buffers are disposable, that opens
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * up a TOCTOU race condition w.r.t. the CIL committing and removing the log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * vector buffers between the check and the formatting of the item into the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * log vector buffer within the xc_ctx_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * Because the log vector buffer needs to be unchanged during the CIL push
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * process, we cannot share the buffer between the transaction commit (which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * modifies the buffer) and the CIL push context that is writing the changes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * into the log. This means skipping preallocation of buffer space is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) * unreliable, but we most definitely do not want to be allocating and freeing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * buffers unnecessarily during commits when overwrites can be done safely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * The simplest solution to this problem is to allocate a shadow buffer when a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * log item is committed for the second time, and then to only use this buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * if necessary. The buffer can remain attached to the log item until such time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * it is needed, and this is the buffer that is reallocated to match the size of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * the incoming modification. Then during the formatting of the item we can swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * the active buffer with the new one if we can't reuse the existing buffer. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * don't free the old buffer as it may be reused on the next modification if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * it's size is right, otherwise we'll free and reallocate it at that point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * This function builds a vector for the changes in each log item in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * transaction. It then works out the length of the buffer needed for each log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * item, allocates them and attaches the vector to the log item in preparation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * for the formatting step which occurs under the xc_ctx_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * While this means the memory footprint goes up, it avoids the repeated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * alloc/free pattern that repeated modifications of an item would otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * cause, and hence minimises the CPU overhead of such behaviour.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) xlog_cil_alloc_shadow_bufs(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) struct xfs_trans *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) struct xfs_log_item *lip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) list_for_each_entry(lip, &tp->t_items, li_trans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) struct xfs_log_vec *lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) int niovecs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) int nbytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) int buf_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) bool ordered = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) /* Skip items which aren't dirty in this transaction. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) /* get number of vecs and size of data to be stored */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) lip->li_ops->iop_size(lip, &niovecs, &nbytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * Ordered items need to be tracked but we do not wish to write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * them. We need a logvec to track the object, but we do not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * need an iovec or buffer to be allocated for copying data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) if (niovecs == XFS_LOG_VEC_ORDERED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) ordered = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) niovecs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) nbytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * We 64-bit align the length of each iovec so that the start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * of the next one is naturally aligned. We'll need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * account for that slack space here. Then round nbytes up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * to 64-bit alignment so that the initial buffer alignment is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * easy to calculate and verify.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) nbytes += niovecs * sizeof(uint64_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) nbytes = round_up(nbytes, sizeof(uint64_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * The data buffer needs to start 64-bit aligned, so round up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * that space to ensure we can align it appropriately and not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * overrun the buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) buf_size = nbytes + xlog_cil_iovec_space(niovecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * if we have no shadow buffer, or it is too small, we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * reallocate it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) if (!lip->li_lv_shadow ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) buf_size > lip->li_lv_shadow->lv_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * We free and allocate here as a realloc would copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * unnecessary data. We don't use kmem_zalloc() for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * same reason - we don't need to zero the data area in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * the buffer, only the log vector header and the iovec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * storage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) kmem_free(lip->li_lv_shadow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) lv = kmem_alloc_large(buf_size, KM_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) memset(lv, 0, xlog_cil_iovec_space(niovecs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) lv->lv_item = lip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) lv->lv_size = buf_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) if (ordered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) lip->li_lv_shadow = lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) /* same or smaller, optimise common overwrite case */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) lv = lip->li_lv_shadow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) if (ordered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) lv->lv_buf_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) lv->lv_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) lv->lv_next = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) /* Ensure the lv is set up according to ->iop_size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) lv->lv_niovecs = niovecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) /* The allocated data region lies beyond the iovec region */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) lv->lv_buf = (char *)lv + xlog_cil_iovec_space(niovecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * Prepare the log item for insertion into the CIL. Calculate the difference in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * log space and vectors it will consume, and if it is a new item pin it as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) xfs_cil_prepare_item(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) struct xfs_log_vec *lv,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) struct xfs_log_vec *old_lv,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) int *diff_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) int *diff_iovecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) /* Account for the new LV being passed in */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) *diff_len += lv->lv_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) *diff_iovecs += lv->lv_niovecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * If there is no old LV, this is the first time we've seen the item in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * this CIL context and so we need to pin it. If we are replacing the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) * old_lv, then remove the space it accounts for and make it the shadow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * buffer for later freeing. In both cases we are now switching to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * shadow buffer, so update the pointer to it appropriately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) if (!old_lv) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) if (lv->lv_item->li_ops->iop_pin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) lv->lv_item->li_ops->iop_pin(lv->lv_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) lv->lv_item->li_lv_shadow = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) } else if (old_lv != lv) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) *diff_len -= old_lv->lv_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) *diff_iovecs -= old_lv->lv_niovecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) lv->lv_item->li_lv_shadow = old_lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) /* attach new log vector to log item */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) lv->lv_item->li_lv = lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * If this is the first time the item is being committed to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * CIL, store the sequence number on the log item so we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * tell in future commits whether this is the first checkpoint
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * the item is being committed into.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) if (!lv->lv_item->li_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * Format log item into a flat buffers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) * For delayed logging, we need to hold a formatted buffer containing all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * changes on the log item. This enables us to relog the item in memory and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * write it out asynchronously without needing to relock the object that was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * modified at the time it gets written into the iclog.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) * This function takes the prepared log vectors attached to each log item, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * formats the changes into the log vector buffer. The buffer it uses is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * dependent on the current state of the vector in the CIL - the shadow lv is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * guaranteed to be large enough for the current modification, but we will only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * use that if we can't reuse the existing lv. If we can't reuse the existing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * lv, then simple swap it out for the shadow lv. We don't free it - that is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * done lazily either by th enext modification or the freeing of the log item.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * We don't set up region headers during this process; we simply copy the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * regions into the flat buffer. We can do this because we still have to do a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * formatting step to write the regions into the iclog buffer. Writing the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) * ophdrs during the iclog write means that we can support splitting large
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) * regions across iclog boundares without needing a change in the format of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) * item/region encapsulation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * Hence what we need to do now is change the rewrite the vector array to point
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * to the copied region inside the buffer we just allocated. This allows us to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) * format the regions into the iclog as though they are being formatted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) * directly out of the objects themselves.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) xlog_cil_insert_format_items(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) struct xfs_trans *tp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) int *diff_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) int *diff_iovecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) struct xfs_log_item *lip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) /* Bail out if we didn't find a log item. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) if (list_empty(&tp->t_items)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) ASSERT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) list_for_each_entry(lip, &tp->t_items, li_trans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) struct xfs_log_vec *lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) struct xfs_log_vec *old_lv = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) struct xfs_log_vec *shadow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) bool ordered = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) /* Skip items which aren't dirty in this transaction. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * The formatting size information is already attached to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * the shadow lv on the log item.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) shadow = lip->li_lv_shadow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) if (shadow->lv_buf_len == XFS_LOG_VEC_ORDERED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) ordered = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) /* Skip items that do not have any vectors for writing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) if (!shadow->lv_niovecs && !ordered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) /* compare to existing item size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) old_lv = lip->li_lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) if (lip->li_lv && shadow->lv_size <= lip->li_lv->lv_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) /* same or smaller, optimise common overwrite case */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) lv = lip->li_lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) lv->lv_next = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) if (ordered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) goto insert;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) * set the item up as though it is a new insertion so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) * that the space reservation accounting is correct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) *diff_iovecs -= lv->lv_niovecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) *diff_len -= lv->lv_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) /* Ensure the lv is set up according to ->iop_size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) lv->lv_niovecs = shadow->lv_niovecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) /* reset the lv buffer information for new formatting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) lv->lv_buf_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) lv->lv_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) lv->lv_buf = (char *)lv +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) xlog_cil_iovec_space(lv->lv_niovecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) /* switch to shadow buffer! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) lv = shadow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) lv->lv_item = lip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) if (ordered) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) /* track as an ordered logvec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) ASSERT(lip->li_lv == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) goto insert;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) lip->li_ops->iop_format(lip, lv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) insert:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) * Insert the log items into the CIL and calculate the difference in space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) * consumed by the item. Add the space to the checkpoint ticket and calculate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) * if the change requires additional log metadata. If it does, take that space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) * as well. Remove the amount of space we added to the checkpoint ticket from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) * the current transaction ticket so that the accounting works out correctly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) xlog_cil_insert_items(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) struct xfs_trans *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) struct xfs_cil *cil = log->l_cilp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) struct xfs_cil_ctx *ctx = cil->xc_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) struct xfs_log_item *lip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) int len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) int diff_iovecs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) int iclog_space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) int iovhdr_res = 0, split_res = 0, ctx_res = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) ASSERT(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * We can do this safely because the context can't checkpoint until we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) * are done so it doesn't matter exactly how we update the CIL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) spin_lock(&cil->xc_cil_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) /* account for space used by new iovec headers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) len += iovhdr_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) ctx->nvecs += diff_iovecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) /* attach the transaction to the CIL if it has any busy extents */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) if (!list_empty(&tp->t_busy))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) list_splice_init(&tp->t_busy, &ctx->busy_extents);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) * Now transfer enough transaction reservation to the context ticket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) * for the checkpoint. The context ticket is special - the unit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) * reservation has to grow as well as the current reservation as we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) * steal from tickets so we can correctly determine the space used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) * during the transaction commit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) if (ctx->ticket->t_curr_res == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) ctx_res = ctx->ticket->t_unit_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) ctx->ticket->t_curr_res = ctx_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) tp->t_ticket->t_curr_res -= ctx_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) /* do we need space for more log record headers? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) iclog_space = log->l_iclog_size - log->l_iclog_hsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) if (len > 0 && (ctx->space_used / iclog_space !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) (ctx->space_used + len) / iclog_space)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) split_res = (len + iclog_space - 1) / iclog_space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) /* need to take into account split region headers, too */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) split_res *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) ctx->ticket->t_unit_res += split_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) ctx->ticket->t_curr_res += split_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) tp->t_ticket->t_curr_res -= split_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) ASSERT(tp->t_ticket->t_curr_res >= len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) tp->t_ticket->t_curr_res -= len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) ctx->space_used += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) * If we've overrun the reservation, dump the tx details before we move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) * the log items. Shutdown is imminent...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) if (WARN_ON(tp->t_ticket->t_curr_res < 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) xfs_warn(log->l_mp, "Transaction log reservation overrun:");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) xfs_warn(log->l_mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) " log items: %d bytes (iov hdrs: %d bytes)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) len, iovhdr_res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) xfs_warn(log->l_mp, " split region headers: %d bytes",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) split_res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) xfs_warn(log->l_mp, " ctx ticket: %d bytes", ctx_res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) xlog_print_trans(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) * Now (re-)position everything modified at the tail of the CIL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) * We do this here so we only need to take the CIL lock once during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) * the transaction commit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) list_for_each_entry(lip, &tp->t_items, li_trans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) /* Skip items which aren't dirty in this transaction. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) * Only move the item if it isn't already at the tail. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * to prevent a transient list_empty() state when reinserting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) * an item that is already the only item in the CIL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) if (!list_is_last(&lip->li_cil, &cil->xc_cil))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) list_move_tail(&lip->li_cil, &cil->xc_cil);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) spin_unlock(&cil->xc_cil_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) if (tp->t_ticket->t_curr_res < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) xlog_cil_free_logvec(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) struct xfs_log_vec *log_vector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) struct xfs_log_vec *lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) for (lv = log_vector; lv; ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) struct xfs_log_vec *next = lv->lv_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) kmem_free(lv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) lv = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) xlog_discard_endio_work(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) struct xfs_cil_ctx *ctx =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) container_of(work, struct xfs_cil_ctx, discard_endio_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) struct xfs_mount *mp = ctx->cil->xc_log->l_mp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) xfs_extent_busy_clear(mp, &ctx->busy_extents, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) kmem_free(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * Queue up the actual completion to a thread to avoid IRQ-safe locking for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * pagb_lock. Note that we need a unbounded workqueue, otherwise we might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) * get the execution delayed up to 30 seconds for weird reasons.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) xlog_discard_endio(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) struct xfs_cil_ctx *ctx = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) INIT_WORK(&ctx->discard_endio_work, xlog_discard_endio_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) queue_work(xfs_discard_wq, &ctx->discard_endio_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) xlog_discard_busy_extents(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) struct xfs_cil_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) struct list_head *list = &ctx->busy_extents;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) struct xfs_extent_busy *busyp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) struct bio *bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) ASSERT(mp->m_flags & XFS_MOUNT_DISCARD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) list_for_each_entry(busyp, list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) busyp->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) XFS_FSB_TO_BB(mp, busyp->length),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) GFP_NOFS, 0, &bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) if (error && error != -EOPNOTSUPP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) xfs_info(mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) "discard failed for extent [0x%llx,%u], error %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) (unsigned long long)busyp->bno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) busyp->length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) if (bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) bio->bi_private = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) bio->bi_end_io = xlog_discard_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) xlog_discard_endio_work(&ctx->discard_endio_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) * Mark all items committed and clear busy extents. We free the log vector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) * chains in a separate pass so that we unpin the log items as quickly as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) * possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) xlog_cil_committed(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) struct xfs_cil_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) struct xfs_mount *mp = ctx->cil->xc_log->l_mp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) bool abort = XLOG_FORCED_SHUTDOWN(ctx->cil->xc_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) * If the I/O failed, we're aborting the commit and already shutdown.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) * Wake any commit waiters before aborting the log items so we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) * block async log pushers on callbacks. Async log pushers explicitly do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) * not wait on log force completion because they may be holding locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) * required to unpin items.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) if (abort) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) spin_lock(&ctx->cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) wake_up_all(&ctx->cil->xc_commit_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) spin_unlock(&ctx->cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) ctx->start_lsn, abort);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) xfs_extent_busy_sort(&ctx->busy_extents);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) xfs_extent_busy_clear(mp, &ctx->busy_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) spin_lock(&ctx->cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) list_del(&ctx->committing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) spin_unlock(&ctx->cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) xlog_cil_free_logvec(ctx->lv_chain);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) if (!list_empty(&ctx->busy_extents))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) xlog_discard_busy_extents(mp, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) kmem_free(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) xlog_cil_process_committed(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) struct list_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) struct xfs_cil_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) while ((ctx = list_first_entry_or_null(list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) struct xfs_cil_ctx, iclog_entry))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) list_del(&ctx->iclog_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) xlog_cil_committed(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) * Push the Committed Item List to the log.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) * If the current sequence is the same as xc_push_seq we need to do a flush. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) * xc_push_seq is less than the current sequence, then it has already been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) * flushed and we don't need to do anything - the caller will wait for it to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) * complete if necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) * xc_push_seq is checked unlocked against the sequence number for a match.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) * Hence we can allow log forces to run racily and not issue pushes for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) * same sequence twice. If we get a race between multiple pushes for the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) * sequence they will block on the first one and then abort, hence avoiding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) * needless pushes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) xlog_cil_push_work(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) struct xfs_cil *cil =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) container_of(work, struct xfs_cil, xc_push_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) struct xlog *log = cil->xc_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) struct xfs_log_vec *lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) struct xfs_cil_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) struct xfs_cil_ctx *new_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) struct xlog_in_core *commit_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) struct xlog_ticket *tic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) int num_iovecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) struct xfs_trans_header thdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) struct xfs_log_iovec lhdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) struct xfs_log_vec lvhdr = { NULL };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) xfs_lsn_t commit_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) xfs_lsn_t push_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) new_ctx->ticket = xlog_cil_ticket_alloc(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) down_write(&cil->xc_ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) ctx = cil->xc_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) spin_lock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) push_seq = cil->xc_push_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) ASSERT(push_seq <= ctx->sequence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) * Wake up any background push waiters now this context is being pushed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) if (ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) wake_up_all(&cil->xc_push_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) * Check if we've anything to push. If there is nothing, then we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) * move on to a new sequence number and so we have to be able to push
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) * this sequence again later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) if (list_empty(&cil->xc_cil)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) cil->xc_push_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) goto out_skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) /* check for a previously pushed sequence */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) if (push_seq < cil->xc_ctx->sequence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) goto out_skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * We are now going to push this context, so add it to the committing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * list before we do anything else. This ensures that anyone waiting on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) * this push can easily detect the difference between a "push in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) * progress" and "CIL is empty, nothing to do".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) * IOWs, a wait loop can now check for:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) * the current sequence not being found on the committing list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) * an empty CIL; and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) * an unchanged sequence number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) * to detect a push that had nothing to do and therefore does not need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) * waiting on. If the CIL is not empty, we get put on the committing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) * list before emptying the CIL and bumping the sequence number. Hence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) * an empty CIL and an unchanged sequence number means we jumped out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) * above after doing nothing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) * Hence the waiter will either find the commit sequence on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) * committing list or the sequence number will be unchanged and the CIL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) * still dirty. In that latter case, the push has not yet started, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) * so the waiter will have to continue trying to check the CIL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) * committing list until it is found. In extreme cases of delay, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) * sequence may fully commit between the attempts the wait makes to wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) * on the commit sequence.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) list_add(&ctx->committing, &cil->xc_committing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) * pull all the log vectors off the items in the CIL, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) * remove the items from the CIL. We don't need the CIL lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) * here because it's only needed on the transaction commit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) * side which is currently locked out by the flush lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) lv = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) num_iovecs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) while (!list_empty(&cil->xc_cil)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) struct xfs_log_item *item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) item = list_first_entry(&cil->xc_cil,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) struct xfs_log_item, li_cil);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) list_del_init(&item->li_cil);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) if (!ctx->lv_chain)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) ctx->lv_chain = item->li_lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) lv->lv_next = item->li_lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) lv = item->li_lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) item->li_lv = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) num_iovecs += lv->lv_niovecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) * initialise the new context and attach it to the CIL. Then attach
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) * the current context to the CIL committing list so it can be found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) * during log forces to extract the commit lsn of the sequence that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) * needs to be forced.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) INIT_LIST_HEAD(&new_ctx->committing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) INIT_LIST_HEAD(&new_ctx->busy_extents);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) new_ctx->sequence = ctx->sequence + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) new_ctx->cil = cil;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) cil->xc_ctx = new_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) * The switch is now done, so we can drop the context lock and move out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) * of a shared context. We can't just go straight to the commit record,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) * though - we need to synchronise with previous and future commits so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) * that the commit records are correctly ordered in the log to ensure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) * that we process items during log IO completion in the correct order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) * For example, if we get an EFI in one checkpoint and the EFD in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) * next (e.g. due to log forces), we do not want the checkpoint with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) * the EFD to be committed before the checkpoint with the EFI. Hence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) * we must strictly order the commit records of the checkpoints so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) * that: a) the checkpoint callbacks are attached to the iclogs in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) * correct order; and b) the checkpoints are replayed in correct order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) * in log recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) * Hence we need to add this context to the committing context list so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) * that higher sequences will wait for us to write out a commit record
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) * before they do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) * xfs_log_force_lsn requires us to mirror the new sequence into the cil
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) * structure atomically with the addition of this sequence to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) * committing list. This also ensures that we can do unlocked checks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) * against the current sequence in log forces without risking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) * deferencing a freed context pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) spin_lock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) cil->xc_current_sequence = new_ctx->sequence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) up_write(&cil->xc_ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) * Build a checkpoint transaction header and write it to the log to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) * begin the transaction. We need to account for the space used by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) * transaction header here as it is not accounted for in xlog_write().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) * The LSN we need to pass to the log items on transaction commit is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) * the LSN reported by the first log vector write. If we use the commit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) * record lsn then we can move the tail beyond the grant write head.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) tic = ctx->ticket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) thdr.th_magic = XFS_TRANS_HEADER_MAGIC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) thdr.th_type = XFS_TRANS_CHECKPOINT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) thdr.th_tid = tic->t_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) thdr.th_num_items = num_iovecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) lhdr.i_addr = &thdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) lhdr.i_len = sizeof(xfs_trans_header_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) lhdr.i_type = XLOG_REG_TYPE_TRANSHDR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) lvhdr.lv_niovecs = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) lvhdr.lv_iovecp = &lhdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) lvhdr.lv_next = ctx->lv_chain;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) goto out_abort_free_ticket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) * now that we've written the checkpoint into the log, strictly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) * order the commit records so replay will get them in the right order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) spin_lock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) list_for_each_entry(new_ctx, &cil->xc_committing, committing) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) * Avoid getting stuck in this loop because we were woken by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) * shutdown, but then went back to sleep once already in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) * shutdown state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) if (XLOG_FORCED_SHUTDOWN(log)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) goto out_abort_free_ticket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) * Higher sequences will wait for this one so skip them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) * Don't wait for our own sequence, either.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) if (new_ctx->sequence >= ctx->sequence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) if (!new_ctx->commit_lsn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) * It is still being pushed! Wait for the push to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) * complete, then start again from the beginning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) error = xlog_commit_record(log, tic, &commit_iclog, &commit_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) goto out_abort_free_ticket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) xfs_log_ticket_ungrant(log, tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) spin_lock(&commit_iclog->ic_callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) if (commit_iclog->ic_state == XLOG_STATE_IOERROR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) spin_unlock(&commit_iclog->ic_callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) goto out_abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) commit_iclog->ic_state == XLOG_STATE_WANT_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) spin_unlock(&commit_iclog->ic_callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) * now the checkpoint commit is complete and we've attached the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) * callbacks to the iclog we can assign the commit LSN to the context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) * and wake up anyone who is waiting for the commit to complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) spin_lock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) ctx->commit_lsn = commit_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) wake_up_all(&cil->xc_commit_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) /* release the hounds! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) xfs_log_release_iclog(commit_iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) out_skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) up_write(&cil->xc_ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) xfs_log_ticket_put(new_ctx->ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) kmem_free(new_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) out_abort_free_ticket:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) xfs_log_ticket_ungrant(log, tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) out_abort:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) ASSERT(XLOG_FORCED_SHUTDOWN(log));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) xlog_cil_committed(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) * We need to push CIL every so often so we don't cache more than we can fit in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) * the log. The limit really is that a checkpoint can't be more than half the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) * log (the current checkpoint is not allowed to overwrite the previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) * checkpoint), but commit latency and memory usage limit this to a smaller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) * size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) xlog_cil_push_background(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) struct xlog *log) __releases(cil->xc_ctx_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) struct xfs_cil *cil = log->l_cilp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) * The cil won't be empty because we are called while holding the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) * context lock so whatever we added to the CIL will still be there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) ASSERT(!list_empty(&cil->xc_cil));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) * don't do a background push if we haven't used up all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) * space available yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) up_read(&cil->xc_ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) spin_lock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) if (cil->xc_push_seq < cil->xc_current_sequence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) cil->xc_push_seq = cil->xc_current_sequence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) * Drop the context lock now, we can't hold that if we need to sleep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) * because we are over the blocking threshold. The push_lock is still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) * held, so blocking threshold sleep/wakeup is still correctly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) * serialised here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) up_read(&cil->xc_ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) * If we are well over the space limit, throttle the work that is being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) * done until the push work on this context has begun.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) ASSERT(cil->xc_ctx->space_used < log->l_logsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) * number that is passed. When it returns, the work will be queued for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) * @push_seq, but it won't be completed. The caller is expected to do any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) * waiting for push_seq to complete if it is required.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) xlog_cil_push_now(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) xfs_lsn_t push_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) struct xfs_cil *cil = log->l_cilp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) if (!cil)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) ASSERT(push_seq && push_seq <= cil->xc_current_sequence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) /* start on any pending background push to minimise wait time on it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) flush_work(&cil->xc_push_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) * If the CIL is empty or we've already pushed the sequence then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) * there's no work we need to do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) spin_lock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) cil->xc_push_seq = push_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) xlog_cil_empty(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) struct xfs_cil *cil = log->l_cilp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) bool empty = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) spin_lock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) if (list_empty(&cil->xc_cil))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) empty = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) return empty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) * Commit a transaction with the given vector to the Committed Item List.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) * To do this, we need to format the item, pin it in memory if required and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) * account for the space used by the transaction. Once we have done that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) * need to release the unused reservation for the transaction, attach the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) * transaction to the checkpoint context so we carry the busy extents through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) * to checkpoint completion, and then unlock all the items in the transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) * Called with the context lock already held in read mode to lock out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) * background commit, returns without it held once background commits are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) * allowed again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) xfs_log_commit_cil(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) struct xfs_trans *tp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) xfs_lsn_t *commit_lsn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) bool regrant)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) struct xlog *log = mp->m_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) struct xfs_cil *cil = log->l_cilp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) struct xfs_log_item *lip, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) xfs_lsn_t xc_commit_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) * Do all necessary memory allocation before we lock the CIL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) * This ensures the allocation does not deadlock with a CIL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * push in memory reclaim (e.g. from kswapd).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) xlog_cil_alloc_shadow_bufs(log, tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) /* lock out background commit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) down_read(&cil->xc_ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) xlog_cil_insert_items(log, tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) xc_commit_lsn = cil->xc_ctx->sequence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) if (commit_lsn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) *commit_lsn = xc_commit_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) if (regrant && !XLOG_FORCED_SHUTDOWN(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) xfs_log_ticket_regrant(log, tp->t_ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) xfs_log_ticket_ungrant(log, tp->t_ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) tp->t_ticket = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) xfs_trans_unreserve_and_mod_sb(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) * Once all the items of the transaction have been copied to the CIL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) * the items can be unlocked and possibly freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) * This needs to be done before we drop the CIL context lock because we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) * have to update state in the log items and unlock them before they go
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) * to disk. If we don't, then the CIL checkpoint can race with us and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) * we can run checkpoint completion before we've updated and unlocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) * the log items. This affects (at least) processing of stale buffers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) * inodes and EFIs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) trace_xfs_trans_commit_items(tp, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) xfs_trans_del_item(lip);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) if (lip->li_ops->iop_committing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) lip->li_ops->iop_committing(lip, xc_commit_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) /* xlog_cil_push_background() releases cil->xc_ctx_lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) xlog_cil_push_background(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) * Conditionally push the CIL based on the sequence passed in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) * We only need to push if we haven't already pushed the sequence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) * number given. Hence the only time we will trigger a push here is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) * if the push sequence is the same as the current context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) * We return the current commit lsn to allow the callers to determine if a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) * iclog flush is necessary following this call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) xfs_lsn_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) xlog_cil_force_lsn(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) xfs_lsn_t sequence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) struct xfs_cil *cil = log->l_cilp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) struct xfs_cil_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) xfs_lsn_t commit_lsn = NULLCOMMITLSN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) ASSERT(sequence <= cil->xc_current_sequence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) * check to see if we need to force out the current context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) * xlog_cil_push() handles racing pushes for the same sequence,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) * so no need to deal with it here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) xlog_cil_push_now(log, sequence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) * See if we can find a previous sequence still committing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) * We need to wait for all previous sequence commits to complete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) * before allowing the force of push_seq to go ahead. Hence block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) * on commits for those as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) spin_lock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) list_for_each_entry(ctx, &cil->xc_committing, committing) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) * Avoid getting stuck in this loop because we were woken by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) * shutdown, but then went back to sleep once already in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) * shutdown state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) if (XLOG_FORCED_SHUTDOWN(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) goto out_shutdown;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) if (ctx->sequence > sequence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) if (!ctx->commit_lsn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) * It is still being pushed! Wait for the push to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) * complete, then start again from the beginning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) if (ctx->sequence != sequence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) /* found it! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) commit_lsn = ctx->commit_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) * The call to xlog_cil_push_now() executes the push in the background.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) * Hence by the time we have got here it our sequence may not have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * pushed yet. This is true if the current sequence still matches the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * push sequence after the above wait loop and the CIL still contains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) * dirty objects. This is guaranteed by the push code first adding the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) * context to the committing list before emptying the CIL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) * Hence if we don't find the context in the committing list and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) * current sequence number is unchanged then the CIL contents are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) * significant. If the CIL is empty, if means there was nothing to push
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) * and that means there is nothing to wait for. If the CIL is not empty,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) * it means we haven't yet started the push, because if it had started
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) * we would have found the context on the committing list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) if (sequence == cil->xc_current_sequence &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) !list_empty(&cil->xc_cil)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) return commit_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) * We detected a shutdown in progress. We need to trigger the log force
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) * to pass through it's iclog state machine error handling, even though
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) * we are already in a shutdown state. Hence we can't return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) * NULLCOMMITLSN here as that has special meaning to log forces (i.e.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) * LSN is already stable), so we return a zero LSN instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) out_shutdown:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) spin_unlock(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) * Check if the current log item was first committed in this sequence.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) * We can't rely on just the log item being in the CIL, we have to check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) * the recorded commit sequence number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) * Note: for this to be used in a non-racy manner, it has to be called with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) * CIL flushing locked out. As a result, it should only be used during the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) * transaction commit process when deciding what to format into the item.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) xfs_log_item_in_current_chkpt(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) struct xfs_log_item *lip)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) struct xfs_cil_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) if (list_empty(&lip->li_cil))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) ctx = lip->li_mountp->m_log->l_cilp->xc_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) * li_seq is written on the first commit of a log item to record the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) * first checkpoint it is written to. Hence if it is different to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) * current sequence, we're in a new checkpoint.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) * Perform initial CIL structure initialisation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) xlog_cil_init(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) struct xfs_cil *cil;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) struct xfs_cil_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) cil = kmem_zalloc(sizeof(*cil), KM_MAYFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) if (!cil)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) ctx = kmem_zalloc(sizeof(*ctx), KM_MAYFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) if (!ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) kmem_free(cil);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) INIT_WORK(&cil->xc_push_work, xlog_cil_push_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) INIT_LIST_HEAD(&cil->xc_cil);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) INIT_LIST_HEAD(&cil->xc_committing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) spin_lock_init(&cil->xc_cil_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) spin_lock_init(&cil->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) init_waitqueue_head(&cil->xc_push_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) init_rwsem(&cil->xc_ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) init_waitqueue_head(&cil->xc_commit_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) INIT_LIST_HEAD(&ctx->committing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) INIT_LIST_HEAD(&ctx->busy_extents);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) ctx->sequence = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) ctx->cil = cil;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) cil->xc_ctx = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) cil->xc_current_sequence = ctx->sequence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) cil->xc_log = log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) log->l_cilp = cil;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) xlog_cil_destroy(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) if (log->l_cilp->xc_ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) if (log->l_cilp->xc_ctx->ticket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) kmem_free(log->l_cilp->xc_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) ASSERT(list_empty(&log->l_cilp->xc_cil));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) kmem_free(log->l_cilp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247)