^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (c) 2000-2005 Silicon Graphics, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * All Rights Reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include "xfs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include "xfs_fs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include "xfs_shared.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "xfs_format.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "xfs_log_format.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "xfs_trans_resv.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "xfs_mount.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "xfs_errortag.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "xfs_error.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "xfs_trans.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "xfs_trans_priv.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "xfs_log.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "xfs_log_priv.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include "xfs_trace.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include "xfs_sysfs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include "xfs_sb.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include "xfs_health.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) kmem_zone_t *xfs_log_ticket_zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) /* Local miscellaneous function prototypes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) STATIC struct xlog *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) xlog_alloc_log(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) struct xfs_buftarg *log_target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) xfs_daddr_t blk_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) int num_bblks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) xlog_space_left(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) atomic64_t *head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) xlog_dealloc_log(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) struct xlog *log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) /* local state machine functions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) STATIC void xlog_state_done_syncing(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) struct xlog_in_core *iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) xlog_state_get_iclog_space(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) int len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) struct xlog_in_core **iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) struct xlog_ticket *ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) int *continued_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) int *logoffsetp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) xlog_state_switch_iclogs(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) int eventual_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) xlog_grant_push_ail(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) int need_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) xlog_sync(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct xlog_in_core *iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #if defined(DEBUG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) xlog_verify_dest_ptr(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) void *ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) xlog_verify_grant_tail(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) struct xlog *log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) xlog_verify_iclog(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) int count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) xlog_verify_tail_lsn(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) xfs_lsn_t tail_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) #define xlog_verify_dest_ptr(a,b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) #define xlog_verify_grant_tail(a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) #define xlog_verify_iclog(a,b,c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) #define xlog_verify_tail_lsn(a,b,c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) xlog_iclogs_empty(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) struct xlog *log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) xlog_grant_sub_space(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) atomic64_t *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) int bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) int64_t head_val = atomic64_read(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) int64_t new, old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) int cycle, space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) xlog_crack_grant_head_val(head_val, &cycle, &space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) space -= bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) if (space < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) space += log->l_logsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) cycle--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) old = head_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) new = xlog_assign_grant_head_val(cycle, space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) head_val = atomic64_cmpxchg(head, old, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) } while (head_val != old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) xlog_grant_add_space(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) atomic64_t *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) int bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) int64_t head_val = atomic64_read(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) int64_t new, old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) int tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) int cycle, space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) xlog_crack_grant_head_val(head_val, &cycle, &space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) tmp = log->l_logsize - space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) if (tmp > bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) space += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) space = bytes - tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) cycle++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) old = head_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) new = xlog_assign_grant_head_val(cycle, space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) head_val = atomic64_cmpxchg(head, old, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) } while (head_val != old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) xlog_grant_head_init(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) struct xlog_grant_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) xlog_assign_grant_head(&head->grant, 1, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) INIT_LIST_HEAD(&head->waiters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) spin_lock_init(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) xlog_grant_head_wake_all(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) struct xlog_grant_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) struct xlog_ticket *tic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) spin_lock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) list_for_each_entry(tic, &head->waiters, t_queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) wake_up_process(tic->t_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) spin_unlock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) xlog_ticket_reservation(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) struct xlog_grant_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) struct xlog_ticket *tic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) if (head == &log->l_write_head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) return tic->t_unit_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) if (tic->t_flags & XLOG_TIC_PERM_RESERV)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) return tic->t_unit_res * tic->t_cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) return tic->t_unit_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) STATIC bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) xlog_grant_head_wake(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) struct xlog_grant_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) int *free_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) struct xlog_ticket *tic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) int need_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) bool woken_task = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) list_for_each_entry(tic, &head->waiters, t_queue) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * There is a chance that the size of the CIL checkpoints in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) * progress at the last AIL push target calculation resulted in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * limiting the target to the log head (l_last_sync_lsn) at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * time. This may not reflect where the log head is now as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * CIL checkpoints may have completed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * Hence when we are woken here, it may be that the head of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) * log that has moved rather than the tail. As the tail didn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * move, there still won't be space available for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * reservation we require. However, if the AIL has already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) * pushed to the target defined by the old log head location, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) * will hang here waiting for something else to update the AIL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) * push target.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * Therefore, if there isn't space to wake the first waiter on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * the grant head, we need to push the AIL again to ensure the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * target reflects both the current log tail and log head
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * position before we wait for the tail to move again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) need_bytes = xlog_ticket_reservation(log, head, tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (*free_bytes < need_bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) if (!woken_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) xlog_grant_push_ail(log, need_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) *free_bytes -= need_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) trace_xfs_log_grant_wake_up(log, tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) wake_up_process(tic->t_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) woken_task = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) xlog_grant_head_wait(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) struct xlog_grant_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) struct xlog_ticket *tic,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) int need_bytes) __releases(&head->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) __acquires(&head->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) list_add_tail(&tic->t_queue, &head->waiters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) if (XLOG_FORCED_SHUTDOWN(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) goto shutdown;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) xlog_grant_push_ail(log, need_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) __set_current_state(TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) spin_unlock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) XFS_STATS_INC(log->l_mp, xs_sleep_logspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) trace_xfs_log_grant_sleep(log, tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) trace_xfs_log_grant_wake(log, tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) spin_lock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) if (XLOG_FORCED_SHUTDOWN(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) goto shutdown;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) } while (xlog_space_left(log, &head->grant) < need_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) list_del_init(&tic->t_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) shutdown:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) list_del_init(&tic->t_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * Atomically get the log space required for a log ticket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * Once a ticket gets put onto head->waiters, it will only return after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) * needed reservation is satisfied.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * This function is structured so that it has a lock free fast path. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * necessary because every new transaction reservation will come through this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * path. Hence any lock will be globally hot if we take it unconditionally on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * every pass.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * As tickets are only ever moved on and off head->waiters under head->lock, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) * only need to take that lock if we are going to add the ticket to the queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * and sleep. We can avoid taking the lock if the ticket was never added to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * head->waiters because the t_queue list head will be empty and we hold the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * only reference to it so it can safely be checked unlocked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) xlog_grant_head_check(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) struct xlog_grant_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) struct xlog_ticket *tic,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) int *need_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) int free_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * If there are other waiters on the queue then give them a chance at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * logspace before us. Wake up the first waiters, if we do not wake
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * up all the waiters then go to sleep waiting for more free space,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * otherwise try to get some space for this transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) *need_bytes = xlog_ticket_reservation(log, head, tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) free_bytes = xlog_space_left(log, &head->grant);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) if (!list_empty_careful(&head->waiters)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) spin_lock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) if (!xlog_grant_head_wake(log, head, &free_bytes) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) free_bytes < *need_bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) error = xlog_grant_head_wait(log, head, tic,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) *need_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) spin_unlock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) } else if (free_bytes < *need_bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) spin_lock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) error = xlog_grant_head_wait(log, head, tic, *need_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) spin_unlock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) xlog_tic_reset_res(xlog_ticket_t *tic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) tic->t_res_num = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) tic->t_res_arr_sum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) tic->t_res_num_ophdrs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) if (tic->t_res_num == XLOG_TIC_LEN_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) /* add to overflow and start again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) tic->t_res_o_flow += tic->t_res_arr_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) tic->t_res_num = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) tic->t_res_arr_sum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) tic->t_res_arr[tic->t_res_num].r_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) tic->t_res_arr[tic->t_res_num].r_type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) tic->t_res_arr_sum += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) tic->t_res_num++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) * Replenish the byte reservation required by moving the grant write head.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) xfs_log_regrant(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) struct xlog_ticket *tic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) struct xlog *log = mp->m_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) int need_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) if (XLOG_FORCED_SHUTDOWN(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) XFS_STATS_INC(mp, xs_try_logspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) * This is a new transaction on the ticket, so we need to change the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) * transaction ID so that the next transaction has a different TID in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) * the log. Just add one to the existing tid so that we can see chains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) * of rolling transactions in the log easily.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) tic->t_tid++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) xlog_grant_push_ail(log, tic->t_unit_res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) tic->t_curr_res = tic->t_unit_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) xlog_tic_reset_res(tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) if (tic->t_cnt > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) trace_xfs_log_regrant(log, tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) error = xlog_grant_head_check(log, &log->l_write_head, tic,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) &need_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) goto out_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) trace_xfs_log_regrant_exit(log, tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) xlog_verify_grant_tail(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) out_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) * If we are failing, make sure the ticket doesn't have any current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) * reservations. We don't want to add this back when the ticket/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) * transaction gets cancelled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) tic->t_curr_res = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) * Reserve log space and return a ticket corresponding to the reservation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * Each reservation is going to reserve extra space for a log record header.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) * When writes happen to the on-disk log, we don't subtract the length of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) * log record header from any reservation. By wasting space in each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) * reservation, we prevent over allocation problems.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) xfs_log_reserve(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) int unit_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) int cnt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) struct xlog_ticket **ticp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) uint8_t client,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) bool permanent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) struct xlog *log = mp->m_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) struct xlog_ticket *tic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) int need_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) if (XLOG_FORCED_SHUTDOWN(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) XFS_STATS_INC(mp, xs_try_logspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) ASSERT(*ticp == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) *ticp = tic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) : tic->t_unit_res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) trace_xfs_log_reserve(log, tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) error = xlog_grant_head_check(log, &log->l_reserve_head, tic,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) &need_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) goto out_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) trace_xfs_log_reserve_exit(log, tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) xlog_verify_grant_tail(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) out_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) * If we are failing, make sure the ticket doesn't have any current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) * reservations. We don't want to add this back when the ticket/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) * transaction gets cancelled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) tic->t_curr_res = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) static bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) __xlog_state_release_iclog(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) struct xlog_in_core *iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) lockdep_assert_held(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) /* update tail before writing to iclog */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) iclog->ic_state = XLOG_STATE_SYNCING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) xlog_verify_tail_lsn(log, iclog, tail_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) /* cycle incremented when incrementing curr_block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) * Flush iclog to disk if this is the last reference to the given iclog and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) * it is in the WANT_SYNC state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) xlog_state_release_iclog(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) struct xlog_in_core *iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) lockdep_assert_held(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) if (iclog->ic_state == XLOG_STATE_IOERROR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) if (atomic_dec_and_test(&iclog->ic_refcnt) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) __xlog_state_release_iclog(log, iclog)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) xlog_sync(log, iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) xfs_log_release_iclog(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) struct xlog_in_core *iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) struct xlog *log = iclog->ic_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) bool sync = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) if (atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) if (iclog->ic_state != XLOG_STATE_IOERROR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) sync = __xlog_state_release_iclog(log, iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) if (sync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) xlog_sync(log, iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * Mount a log filesystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * mp - ubiquitous xfs mount point structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * log_target - buftarg of on-disk log device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) * blk_offset - Start block # where block size is 512 bytes (BBSIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) * num_bblocks - Number of BBSIZE blocks in on-disk log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) * Return error or zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) xfs_log_mount(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) xfs_mount_t *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) xfs_buftarg_t *log_target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) xfs_daddr_t blk_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) int num_bblks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) bool fatal = xfs_sb_version_hascrc(&mp->m_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) int min_logfsbs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) xfs_notice(mp, "Mounting V%d Filesystem",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) XFS_SB_VERSION_NUM(&mp->m_sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) xfs_notice(mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) "Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) XFS_SB_VERSION_NUM(&mp->m_sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) if (IS_ERR(mp->m_log)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) error = PTR_ERR(mp->m_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) * Validate the given log space and drop a critical message via syslog
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) * if the log size is too small that would lead to some unexpected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * situations in transaction log space reservation stage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) * Note: we can't just reject the mount if the validation fails. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) * would mean that people would have to downgrade their kernel just to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) * remedy the situation as there is no way to grow the log (short of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) * black magic surgery with xfs_db).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) * We can, however, reject mounts for CRC format filesystems, as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) * mkfs binary being used to make the filesystem should never create a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) * filesystem with a log that is too small.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) min_logfsbs = xfs_log_calc_minimum_size(mp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) if (mp->m_sb.sb_logblocks < min_logfsbs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) xfs_warn(mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) "Log size %d blocks too small, minimum size is %d blocks",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) mp->m_sb.sb_logblocks, min_logfsbs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) } else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) xfs_warn(mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) "Log size %d blocks too large, maximum size is %lld blocks",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) } else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) xfs_warn(mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) "log size %lld bytes too large, maximum size is %lld bytes",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) XFS_MAX_LOG_BYTES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) } else if (mp->m_sb.sb_logsunit > 1 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) xfs_warn(mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) "log stripe unit %u bytes must be a multiple of block size",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) mp->m_sb.sb_logsunit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) fatal = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) * Log check errors are always fatal on v5; or whenever bad
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) * metadata leads to a crash.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) if (fatal) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) ASSERT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) goto out_free_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) xfs_crit(mp, "Log size out of supported range.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) xfs_crit(mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) "Continuing onwards, but if log hangs are experienced then please report this message in the bug report.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) * Initialize the AIL now we have a log.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) error = xfs_trans_ail_init(mp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) xfs_warn(mp, "AIL initialisation failed: error %d", error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) goto out_free_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) mp->m_log->l_ailp = mp->m_ail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) * skip log recovery on a norecovery mount. pretend it all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) * just worked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) int readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) if (readonly)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) mp->m_flags &= ~XFS_MOUNT_RDONLY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) error = xlog_recover(mp->m_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) if (readonly)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) mp->m_flags |= XFS_MOUNT_RDONLY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) xfs_warn(mp, "log mount/recovery failed: error %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) xlog_recover_cancel(mp->m_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) goto out_destroy_ail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) error = xfs_sysfs_init(&mp->m_log->l_kobj, &xfs_log_ktype, &mp->m_kobj,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) "log");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) goto out_destroy_ail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) /* Normal transactions can now occur */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) * Now the log has been fully initialised and we know were our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) * space grant counters are, we can initialise the permanent ticket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) * needed for delayed logging to work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) xlog_cil_init_post_recovery(mp->m_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) out_destroy_ail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) xfs_trans_ail_destroy(mp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) out_free_log:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) xlog_dealloc_log(mp->m_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) * Finish the recovery of the file system. This is separate from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) * xfs_log_mount() call, because it depends on the code in xfs_mountfs() to read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) * in the root and real-time bitmap inodes between calling xfs_log_mount() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) * here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) * If we finish recovery successfully, start the background log work. If we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) * not doing recovery, then we have a RO filesystem and we don't need to start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) * it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) xfs_log_mount_finish(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) struct xfs_mount *mp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) bool readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) bool recovered = mp->m_log->l_flags & XLOG_RECOVERY_NEEDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) } else if (readonly) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) /* Allow unlinked processing to proceed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) mp->m_flags &= ~XFS_MOUNT_RDONLY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) * During the second phase of log recovery, we need iget and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) * iput to behave like they do for an active filesystem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) * xfs_fs_drop_inode needs to be able to prevent the deletion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) * of inodes before we're done replaying log items on those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) * inodes. Turn it off immediately after recovery finishes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) * so that we don't leak the quota inodes if subsequent mount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) * activities fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) * We let all inodes involved in redo item processing end up on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) * the LRU instead of being evicted immediately so that if we do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) * something to an unlinked inode, the irele won't cause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) * premature truncation and freeing of the inode, which results
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) * in log recovery failure. We have to evict the unreferenced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) * lru inodes after clearing SB_ACTIVE because we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) * otherwise clean up the lru if there's a subsequent failure in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) * xfs_mountfs, which leads to us leaking the inodes if nothing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) * else (e.g. quotacheck) references the inodes before the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) * mount failure occurs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) mp->m_super->s_flags |= SB_ACTIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) error = xlog_recover_finish(mp->m_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) if (!error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) xfs_log_work_queue(mp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) mp->m_super->s_flags &= ~SB_ACTIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) evict_inodes(mp->m_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) * Drain the buffer LRU after log recovery. This is required for v4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) * filesystems to avoid leaving around buffers with NULL verifier ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) * but we do it unconditionally to make sure we're always in a clean
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) * cache state after mount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) * Don't push in the error case because the AIL may have pending intents
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) * that aren't removed until recovery is cancelled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) if (!error && recovered) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) xfs_log_force(mp, XFS_LOG_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) xfs_ail_push_all_sync(mp->m_ail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) xfs_wait_buftarg(mp->m_ddev_targp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) if (readonly)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) mp->m_flags |= XFS_MOUNT_RDONLY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) * The mount has failed. Cancel the recovery if it hasn't completed and destroy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) * the log.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) xfs_log_mount_cancel(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) struct xfs_mount *mp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) xlog_recover_cancel(mp->m_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) xfs_log_unmount(mp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) * Wait for the iclog to be written disk, or return an error if the log has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) * shut down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) xlog_wait_on_iclog(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) struct xlog_in_core *iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) __releases(iclog->ic_log->l_icloglock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) struct xlog *log = iclog->ic_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) if (!XLOG_FORCED_SHUTDOWN(log) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) iclog->ic_state != XLOG_STATE_ACTIVE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) iclog->ic_state != XLOG_STATE_DIRTY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) XFS_STATS_INC(log->l_mp, xs_log_force_sleep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) if (XLOG_FORCED_SHUTDOWN(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) * Write out an unmount record using the ticket provided. We have to account for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) * the data space used in the unmount ticket as this write is not done from a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) * transaction context that has already done the accounting for us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) xlog_write_unmount_record(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) struct xlog_ticket *ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) xfs_lsn_t *lsn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) uint flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) struct xfs_unmount_log_format ulf = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) .magic = XLOG_UNMOUNT_TYPE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) struct xfs_log_iovec reg = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) .i_addr = &ulf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) .i_len = sizeof(ulf),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) .i_type = XLOG_REG_TYPE_UNMOUNT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) struct xfs_log_vec vec = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) .lv_niovecs = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) .lv_iovecp = ®,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) /* account for space used by record data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) ticket->t_curr_res -= sizeof(ulf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) return xlog_write(log, &vec, ticket, lsn, NULL, flags, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) * Mark the filesystem clean by writing an unmount record to the head of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) * log.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) xlog_unmount_write(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) struct xfs_mount *mp = log->l_mp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) struct xlog_in_core *iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) struct xlog_ticket *tic = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) xfs_lsn_t lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) uint flags = XLOG_UNMOUNT_TRANS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) error = xlog_write_unmount_record(log, tic, &lsn, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) * At this point, we're umounting anyway, so there's no point in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) * transitioning log state to IOERROR. Just continue...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) xfs_alert(mp, "%s: unmount record failed", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) iclog = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) atomic_inc(&iclog->ic_refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) if (iclog->ic_state == XLOG_STATE_ACTIVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) xlog_state_switch_iclogs(log, iclog, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) iclog->ic_state == XLOG_STATE_IOERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) error = xlog_state_release_iclog(log, iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) xlog_wait_on_iclog(iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) if (tic) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) trace_xfs_log_umount_write(log, tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) xfs_log_ticket_ungrant(log, tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) xfs_log_unmount_verify_iclog(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) struct xlog_in_core *iclog = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) ASSERT(iclog->ic_offset == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) } while ((iclog = iclog->ic_next) != log->l_iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) * Unmount record used to have a string "Unmount filesystem--" in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) * We just write the magic number now since that particular field isn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) * currently architecture converted and "Unmount" is a bit foo.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) * As far as I know, there weren't any dependencies on the old behaviour.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) xfs_log_unmount_write(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) struct xfs_mount *mp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) struct xlog *log = mp->m_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) * Don't write out unmount record on norecovery mounts or ro devices.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) * Or, if we are doing a forced umount (typically because of IO errors).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) xfs_readonly_buftarg(log->l_targ)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) xfs_log_force(mp, XFS_LOG_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) if (XLOG_FORCED_SHUTDOWN(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) * If we think the summary counters are bad, avoid writing the unmount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) * record to force log recovery at next mount, after which the summary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) * counters will be recalculated. Refer to xlog_check_unmount_rec for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) * more details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) xfs_alert(mp, "%s: will fix summary counters at next mount",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) xfs_log_unmount_verify_iclog(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) xlog_unmount_write(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) * Empty the log for unmount/freeze.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) * To do this, we first need to shut down the background log work so it is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) * trying to cover the log as we clean up. We then need to unpin all objects in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) * the log so we can then flush them out. Once they have completed their IO and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) * run the callbacks removing themselves from the AIL, we can write the unmount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) * record.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) xfs_log_quiesce(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) struct xfs_mount *mp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) cancel_delayed_work_sync(&mp->m_log->l_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) xfs_log_force(mp, XFS_LOG_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) * The superblock buffer is uncached and while xfs_ail_push_all_sync()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) * will push it, xfs_wait_buftarg() will not wait for it. Further,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) * xfs_buf_iowait() cannot be used because it was pushed with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) * XBF_ASYNC flag set, so we need to use a lock/unlock pair to wait for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) * the IO to complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) xfs_ail_push_all_sync(mp->m_ail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) xfs_wait_buftarg(mp->m_ddev_targp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) xfs_buf_lock(mp->m_sb_bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) xfs_buf_unlock(mp->m_sb_bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) xfs_log_unmount_write(mp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) * Shut down and release the AIL and Log.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) * During unmount, we need to ensure we flush all the dirty metadata objects
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) * from the AIL so that the log is empty before we write the unmount record to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) * the log. Once this is done, we can tear down the AIL and the log.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) xfs_log_unmount(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) struct xfs_mount *mp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) xfs_log_quiesce(mp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) xfs_trans_ail_destroy(mp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) xfs_sysfs_del(&mp->m_log->l_kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) xlog_dealloc_log(mp->m_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) xfs_log_item_init(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) struct xfs_log_item *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) int type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) const struct xfs_item_ops *ops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) item->li_mountp = mp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) item->li_ailp = mp->m_ail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) item->li_type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) item->li_ops = ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) item->li_lv = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) INIT_LIST_HEAD(&item->li_ail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) INIT_LIST_HEAD(&item->li_cil);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) INIT_LIST_HEAD(&item->li_bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) INIT_LIST_HEAD(&item->li_trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) * Wake up processes waiting for log space after we have moved the log tail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) xfs_log_space_wake(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) struct xfs_mount *mp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) struct xlog *log = mp->m_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) int free_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) if (XLOG_FORCED_SHUTDOWN(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) if (!list_empty_careful(&log->l_write_head.waiters)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) spin_lock(&log->l_write_head.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) free_bytes = xlog_space_left(log, &log->l_write_head.grant);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) xlog_grant_head_wake(log, &log->l_write_head, &free_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) spin_unlock(&log->l_write_head.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) if (!list_empty_careful(&log->l_reserve_head.waiters)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) spin_lock(&log->l_reserve_head.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) spin_unlock(&log->l_reserve_head.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) * Determine if we have a transaction that has gone to disk that needs to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * covered. To begin the transition to the idle state firstly the log needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) * be idle. That means the CIL, the AIL and the iclogs needs to be empty before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) * we start attempting to cover the log.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) * Only if we are then in a state where covering is needed, the caller is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) * informed that dummy transactions are required to move the log into the idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) * state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) * If there are any items in the AIl or CIL, then we do not want to attempt to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) * cover the log as we may be in a situation where there isn't log space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) * available to run a dummy transaction and this can lead to deadlocks when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) * tail of the log is pinned by an item that is modified in the CIL. Hence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) * there's no point in running a dummy transaction at this point because we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) * can't start trying to idle the log until both the CIL and AIL are empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) xfs_log_need_covered(xfs_mount_t *mp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) struct xlog *log = mp->m_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) int needed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) if (!xfs_fs_writable(mp, SB_FREEZE_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) if (!xlog_cil_empty(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) switch (log->l_covered_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) case XLOG_STATE_COVER_DONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) case XLOG_STATE_COVER_DONE2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) case XLOG_STATE_COVER_IDLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) case XLOG_STATE_COVER_NEED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) case XLOG_STATE_COVER_NEED2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) if (xfs_ail_min_lsn(log->l_ailp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) if (!xlog_iclogs_empty(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) needed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) if (log->l_covered_state == XLOG_STATE_COVER_NEED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) log->l_covered_state = XLOG_STATE_COVER_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) log->l_covered_state = XLOG_STATE_COVER_DONE2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) needed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) return needed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) * We may be holding the log iclog lock upon entering this routine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) xfs_lsn_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) xlog_assign_tail_lsn_locked(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) struct xfs_mount *mp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) struct xlog *log = mp->m_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) struct xfs_log_item *lip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) xfs_lsn_t tail_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) assert_spin_locked(&mp->m_ail->ail_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) * To make sure we always have a valid LSN for the log tail we keep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) * track of the last LSN which was committed in log->l_last_sync_lsn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) * and use that when the AIL was empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) lip = xfs_ail_min(mp->m_ail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) if (lip)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) tail_lsn = lip->li_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) tail_lsn = atomic64_read(&log->l_last_sync_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) trace_xfs_log_assign_tail_lsn(log, tail_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) atomic64_set(&log->l_tail_lsn, tail_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) return tail_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) xfs_lsn_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) xlog_assign_tail_lsn(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) struct xfs_mount *mp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) xfs_lsn_t tail_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) spin_lock(&mp->m_ail->ail_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) tail_lsn = xlog_assign_tail_lsn_locked(mp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) spin_unlock(&mp->m_ail->ail_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) return tail_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) * Return the space in the log between the tail and the head. The head
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) * is passed in the cycle/bytes formal parms. In the special case where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) * the reserve head has wrapped passed the tail, this calculation is no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) * longer valid. In this case, just return 0 which means there is no space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) * in the log. This works for all places where this function is called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) * with the reserve head. Of course, if the write head were to ever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) * wrap the tail, we should blow up. Rather than catch this case here,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) * we depend on other ASSERTions in other parts of the code. XXXmiken
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * This code also handles the case where the reservation head is behind
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * the tail. The details of this case are described below, but the end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) * result is that we return the size of the log as the amount of space left.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) xlog_space_left(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) atomic64_t *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) int free_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) int tail_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) int tail_cycle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) int head_cycle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) int head_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) xlog_crack_grant_head(head, &head_cycle, &head_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) tail_bytes = BBTOB(tail_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) if (tail_cycle == head_cycle && head_bytes >= tail_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) free_bytes = log->l_logsize - (head_bytes - tail_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) else if (tail_cycle + 1 < head_cycle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) else if (tail_cycle < head_cycle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) ASSERT(tail_cycle == (head_cycle - 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) free_bytes = tail_bytes - head_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) * The reservation head is behind the tail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) * In this case we just want to return the size of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) * log as the amount of space left.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) xfs_alert(log->l_mp, "xlog_space_left: head behind tail");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) xfs_alert(log->l_mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) " tail_cycle = %d, tail_bytes = %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) tail_cycle, tail_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) xfs_alert(log->l_mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) " GH cycle = %d, GH bytes = %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) head_cycle, head_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) ASSERT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) free_bytes = log->l_logsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) return free_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) xlog_ioend_work(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) struct xlog_in_core *iclog =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) container_of(work, struct xlog_in_core, ic_end_io_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) struct xlog *log = iclog->ic_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) error = blk_status_to_errno(iclog->ic_bio.bi_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) #ifdef DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) /* treat writes with injected CRC errors as failed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) if (iclog->ic_fail_crc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) error = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) * Race to shutdown the filesystem if we see an error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) xfs_alert(log->l_mp, "log I/O error %d", error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) xlog_state_done_syncing(iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) bio_uninit(&iclog->ic_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) * Drop the lock to signal that we are done. Nothing references the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) * iclog after this, so an unmount waiting on this lock can now tear it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) * down safely. As such, it is unsafe to reference the iclog after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) * unlock as we could race with it being freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) up(&iclog->ic_sema);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) * Return size of each in-core log record buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) * All machines get 8 x 32kB buffers by default, unless tuned otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) * If the filesystem blocksize is too large, we may need to choose a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) * larger size since the directory code currently logs entire blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) xlog_get_iclog_buffer_size(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) if (mp->m_logbufs <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) mp->m_logbufs = XLOG_MAX_ICLOGS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) if (mp->m_logbsize <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) mp->m_logbsize = XLOG_BIG_RECORD_BSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) log->l_iclog_bufs = mp->m_logbufs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) log->l_iclog_size = mp->m_logbsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) * # headers = size / 32k - one header holds cycles from 32k of data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) log->l_iclog_heads =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) xfs_log_work_queue(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) struct xfs_mount *mp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) queue_delayed_work(mp->m_sync_workqueue, &mp->m_log->l_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) msecs_to_jiffies(xfs_syncd_centisecs * 10));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) * Every sync period we need to unpin all items in the AIL and push them to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) * disk. If there is nothing dirty, then we might need to cover the log to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) * indicate that the filesystem is idle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) xfs_log_worker(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) struct xlog *log = container_of(to_delayed_work(work),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) struct xlog, l_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) struct xfs_mount *mp = log->l_mp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) /* dgc: errors ignored - not fatal and nowhere to report them */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) if (xfs_log_need_covered(mp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) * Dump a transaction into the log that contains no real change.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) * This is needed to stamp the current tail LSN into the log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) * during the covering operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) * We cannot use an inode here for this - that will push dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) * state back up into the VFS and then periodic inode flushing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) * will prevent log covering from making progress. Hence we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) * synchronously log the superblock instead to ensure the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) * superblock is immediately unpinned and can be written back.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) xfs_sync_sb(mp, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) xfs_log_force(mp, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) /* start pushing all the metadata that is currently dirty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) xfs_ail_push_all(mp->m_ail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) /* queue us up again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) xfs_log_work_queue(mp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) * This routine initializes some of the log structure for a given mount point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) * Its primary purpose is to fill in enough, so recovery can occur. However,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) * some other stuff may be filled in too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) STATIC struct xlog *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) xlog_alloc_log(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) struct xfs_buftarg *log_target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) xfs_daddr_t blk_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) int num_bblks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) struct xlog *log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) xlog_rec_header_t *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) xlog_in_core_t **iclogp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) xlog_in_core_t *iclog, *prev_iclog=NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) int error = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) uint log2_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) if (!log) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) xfs_warn(mp, "Log allocation failed: No memory!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) log->l_mp = mp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) log->l_targ = log_target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) log->l_logsize = BBTOB(num_bblks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) log->l_logBBstart = blk_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) log->l_logBBsize = num_bblks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) log->l_covered_state = XLOG_STATE_COVER_IDLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) log->l_flags |= XLOG_ACTIVE_RECOVERY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) INIT_DELAYED_WORK(&log->l_work, xfs_log_worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) log->l_prev_block = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) xlog_grant_head_init(&log->l_reserve_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) xlog_grant_head_init(&log->l_write_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) error = -EFSCORRUPTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) if (xfs_sb_version_hassector(&mp->m_sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) log2_size = mp->m_sb.sb_logsectlog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) if (log2_size < BBSHIFT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) xfs_warn(mp, "Log sector size too small (0x%x < 0x%x)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) log2_size, BBSHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) goto out_free_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) log2_size -= BBSHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) if (log2_size > mp->m_sectbb_log) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) xfs_warn(mp, "Log sector size too large (0x%x > 0x%x)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) log2_size, mp->m_sectbb_log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) goto out_free_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) /* for larger sector sizes, must have v2 or external log */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) if (log2_size && log->l_logBBstart > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) !xfs_sb_version_haslogv2(&mp->m_sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) xfs_warn(mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) "log sector size (0x%x) invalid for configuration.",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) log2_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) goto out_free_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) log->l_sectBBsize = 1 << log2_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) xlog_get_iclog_buffer_size(mp, log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) spin_lock_init(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) init_waitqueue_head(&log->l_flush_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) iclogp = &log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) * The amount of memory to allocate for the iclog structure is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) * rather funky due to the way the structure is defined. It is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) * done this way so that we can use different sizes for machines
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) * with different amounts of memory. See the definition of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) * xlog_in_core_t in xfs_log_priv.h for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) ASSERT(log->l_iclog_size >= 4096);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) for (i = 0; i < log->l_iclog_bufs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) int align_mask = xfs_buftarg_dma_alignment(mp->m_logdev_targp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) sizeof(struct bio_vec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) if (!iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) goto out_free_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) *iclogp = iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) iclog->ic_prev = prev_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) prev_iclog = iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) KM_MAYFAIL | KM_ZERO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) if (!iclog->ic_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) goto out_free_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) #ifdef DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) log->l_iclog_bak[i] = &iclog->ic_header;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) head = &iclog->ic_header;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) memset(head, 0, sizeof(xlog_rec_header_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) head->h_version = cpu_to_be32(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) head->h_size = cpu_to_be32(log->l_iclog_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) /* new fields */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) head->h_fmt = cpu_to_be32(XLOG_FMT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) iclog->ic_state = XLOG_STATE_ACTIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) iclog->ic_log = log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) atomic_set(&iclog->ic_refcnt, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) spin_lock_init(&iclog->ic_callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) INIT_LIST_HEAD(&iclog->ic_callbacks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) init_waitqueue_head(&iclog->ic_force_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) init_waitqueue_head(&iclog->ic_write_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) sema_init(&iclog->ic_sema, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) iclogp = &iclog->ic_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) *iclogp = log->l_iclog; /* complete ring */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) mp->m_super->s_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) if (!log->l_ioend_workqueue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) goto out_free_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) error = xlog_cil_init(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) goto out_destroy_workqueue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) return log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) out_destroy_workqueue:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) destroy_workqueue(log->l_ioend_workqueue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) out_free_iclog:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) prev_iclog = iclog->ic_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) kmem_free(iclog->ic_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) kmem_free(iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) if (prev_iclog == log->l_iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) out_free_log:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) kmem_free(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) return ERR_PTR(error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) } /* xlog_alloc_log */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) * Write out the commit record of a transaction associated with the given
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) * ticket to close off a running log write. Return the lsn of the commit record.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) xlog_commit_record(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) struct xlog_ticket *ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) struct xlog_in_core **iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) xfs_lsn_t *lsn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) struct xfs_log_iovec reg = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) .i_addr = NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) .i_len = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) .i_type = XLOG_REG_TYPE_COMMIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) struct xfs_log_vec vec = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) .lv_niovecs = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) .lv_iovecp = ®,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) if (XLOG_FORCED_SHUTDOWN(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) * Compute the LSN that we'd need to push the log tail towards in order to have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) * (a) enough on-disk log space to log the number of bytes specified, (b) at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) * least 25% of the log space free, and (c) at least 256 blocks free. If the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) * log free space already meets all three thresholds, this function returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) * NULLCOMMITLSN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) xfs_lsn_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) xlog_grant_push_threshold(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) int need_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) xfs_lsn_t threshold_lsn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) xfs_lsn_t last_sync_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) int free_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) int free_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) int threshold_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) int threshold_cycle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) int free_threshold;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) free_blocks = BTOBBT(free_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) * Set the threshold for the minimum number of free blocks in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) * log to the maximum of what the caller needs, one quarter of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) * log, and 256 blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) free_threshold = BTOBB(need_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) free_threshold = max(free_threshold, (log->l_logBBsize >> 2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) free_threshold = max(free_threshold, 256);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) if (free_blocks >= free_threshold)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) return NULLCOMMITLSN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) &threshold_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) threshold_block += free_threshold;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) if (threshold_block >= log->l_logBBsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) threshold_block -= log->l_logBBsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) threshold_cycle += 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) threshold_lsn = xlog_assign_lsn(threshold_cycle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) threshold_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) * Don't pass in an lsn greater than the lsn of the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) * log record known to be on disk. Use a snapshot of the last sync lsn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) * so that it doesn't change between the compare and the set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) threshold_lsn = last_sync_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) return threshold_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) * Push the tail of the log if we need to do so to maintain the free log space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) * thresholds set out by xlog_grant_push_threshold. We may need to adopt a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) * policy which pushes on an lsn which is further along in the log once we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) * reach the high water mark. In this manner, we would be creating a low water
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) * mark.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) xlog_grant_push_ail(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) int need_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) xfs_lsn_t threshold_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) threshold_lsn = xlog_grant_push_threshold(log, need_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) if (threshold_lsn == NULLCOMMITLSN || XLOG_FORCED_SHUTDOWN(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) * Get the transaction layer to kick the dirty buffers out to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) * disk asynchronously. No point in trying to do this if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) * the filesystem is shutting down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) xfs_ail_push(log->l_ailp, threshold_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) * Stamp cycle number in every block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) xlog_pack_data(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) int roundoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) int i, j, k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) int size = iclog->ic_offset + roundoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) __be32 cycle_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) char *dp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) dp = iclog->ic_datap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) for (i = 0; i < BTOBB(size); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) if (i >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) *(__be32 *)dp = cycle_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) dp += BBSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) xlog_in_core_2_t *xhdr = iclog->ic_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) for ( ; i < BTOBB(size); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) *(__be32 *)dp = cycle_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) dp += BBSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) for (i = 1; i < log->l_iclog_heads; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) xhdr[i].hic_xheader.xh_cycle = cycle_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) * Calculate the checksum for a log buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) * This is a little more complicated than it should be because the various
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) * headers and the actual data are non-contiguous.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) __le32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) xlog_cksum(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) struct xlog_rec_header *rhead,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) char *dp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) int size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) uint32_t crc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) /* first generate the crc for the record header ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) crc = xfs_start_cksum_update((char *)rhead,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) sizeof(struct xlog_rec_header),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) offsetof(struct xlog_rec_header, h_crc));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) /* ... then for additional cycle data for v2 logs ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) int xheads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) xheads = DIV_ROUND_UP(size, XLOG_HEADER_CYCLE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) for (i = 1; i < xheads; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) crc = crc32c(crc, &xhdr[i].hic_xheader,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) sizeof(struct xlog_rec_ext_header));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) /* ... and finally for the payload */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) crc = crc32c(crc, dp, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) return xfs_end_cksum(crc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) xlog_bio_end_io(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) struct xlog_in_core *iclog = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) queue_work(iclog->ic_log->l_ioend_workqueue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) &iclog->ic_end_io_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) xlog_map_iclog_data(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) struct page *page = kmem_to_page(data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) unsigned int off = offset_in_page(data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) size_t len = min_t(size_t, count, PAGE_SIZE - off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) if (bio_add_page(bio, page, len, off) != len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) data += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) count -= len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) } while (count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) xlog_write_iclog(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) uint64_t bno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) unsigned int count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) bool need_flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) ASSERT(bno < log->l_logBBsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) * We lock the iclogbufs here so that we can serialise against I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) * completion during unmount. We might be processing a shutdown
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) * triggered during unmount, and that can occur asynchronously to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) * unmount thread, and hence we need to ensure that completes before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) * tearing down the iclogbufs. Hence we need to hold the buffer lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) * across the log IO to archieve that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) down(&iclog->ic_sema);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) if (unlikely(iclog->ic_state == XLOG_STATE_IOERROR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) * It would seem logical to return EIO here, but we rely on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) * the log state machine to propagate I/O errors instead of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) * doing it here. We kick of the state machine and unlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) * the buffer manually, the code needs to be kept in sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) * with the I/O completion path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) xlog_state_done_syncing(iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) up(&iclog->ic_sema);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) iclog->ic_bio.bi_end_io = xlog_bio_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) iclog->ic_bio.bi_private = iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) * We use REQ_SYNC | REQ_IDLE here to tell the block layer the are more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) * IOs coming immediately after this one. This prevents the block layer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) * writeback throttle from throttling log writes behind background
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) * metadata writeback and causing priority inversions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) REQ_IDLE | REQ_FUA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) if (need_flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) if (is_vmalloc_addr(iclog->ic_data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) flush_kernel_vmap_range(iclog->ic_data, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) * If this log buffer would straddle the end of the log we will have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) * to split it up into two bios, so that we can continue at the start.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) if (bno + BTOBB(count) > log->l_logBBsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) struct bio *split;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) GFP_NOIO, &fs_bio_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) bio_chain(split, &iclog->ic_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) submit_bio(split);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) /* restart at logical offset zero for the remainder */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) submit_bio(&iclog->ic_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) * We need to bump cycle number for the part of the iclog that is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) * written to the start of the log. Watch out for the header magic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) * number case, though.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) xlog_split_iclog(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) uint64_t bno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) unsigned int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) unsigned int split_offset = BBTOB(log->l_logBBsize - bno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) for (i = split_offset; i < count; i += BBSIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) uint32_t cycle = get_unaligned_be32(data + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) if (++cycle == XLOG_HEADER_MAGIC_NUM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) cycle++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) put_unaligned_be32(cycle, data + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) xlog_calc_iclog_size(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) uint32_t *roundoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) uint32_t count_init, count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) bool use_lsunit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) use_lsunit = xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) log->l_mp->m_sb.sb_logsunit > 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) /* Add for LR header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) count_init = log->l_iclog_hsize + iclog->ic_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) /* Round out the log write size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) if (use_lsunit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) /* we have a v2 stripe unit to use */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) count = BBTOB(BTOBB(count_init));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) ASSERT(count >= count_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) *roundoff = count - count_init;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) if (use_lsunit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) ASSERT(*roundoff < log->l_mp->m_sb.sb_logsunit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) ASSERT(*roundoff < BBTOB(1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) * Flush out the in-core log (iclog) to the on-disk log in an asynchronous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) * fashion. Previously, we should have moved the current iclog
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) * ptr in the log to point to the next available iclog. This allows further
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) * write to continue while this code syncs out an iclog ready to go.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) * Before an in-core log can be written out, the data section must be scanned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) * to save away the 1st word of each BBSIZE block into the header. We replace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) * it with the current cycle count. Each BBSIZE block is tagged with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) * cycle count because there in an implicit assumption that drives will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) * guarantee that entire 512 byte blocks get written at once. In other words,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) * we can't have part of a 512 byte block written and part not written. By
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) * tagging each block, we will know which blocks are valid when recovering
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) * after an unclean shutdown.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) * This routine is single threaded on the iclog. No other thread can be in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) * this routine with the same iclog. Changing contents of iclog can there-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) * fore be done without grabbing the state machine lock. Updating the global
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) * log will require grabbing the lock though.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) * The entire log manager uses a logical block numbering scheme. Only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) * xlog_write_iclog knows about the fact that the log may not start with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) * block zero on a given device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) xlog_sync(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) struct xlog_in_core *iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) unsigned int count; /* byte count of bwrite */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) unsigned int roundoff; /* roundoff to BB or stripe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) uint64_t bno;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) unsigned int size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) bool need_flush = true, split = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) count = xlog_calc_iclog_size(log, iclog, &roundoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) /* move grant heads by roundoff in sync */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) xlog_grant_add_space(log, &log->l_write_head.grant, roundoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) /* put cycle number in every block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) xlog_pack_data(log, iclog, roundoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) /* real byte length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) size = iclog->ic_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) if (xfs_sb_version_haslogv2(&log->l_mp->m_sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) size += roundoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) iclog->ic_header.h_len = cpu_to_be32(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) XFS_STATS_INC(log->l_mp, xs_log_writes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) /* Do we need to split this write into 2 parts? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) if (bno + BTOBB(count) > log->l_logBBsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) xlog_split_iclog(log, &iclog->ic_header, bno, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) split = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) /* calculcate the checksum */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) iclog->ic_datap, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) * Intentionally corrupt the log record CRC based on the error injection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) * frequency, if defined. This facilitates testing log recovery in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) * event of torn writes. Hence, set the IOABORT state to abort the log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) * write on I/O completion and shutdown the fs. The subsequent mount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) * detects the bad CRC and attempts to recover.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) #ifdef DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) iclog->ic_fail_crc = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) xfs_warn(log->l_mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) be64_to_cpu(iclog->ic_header.h_lsn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) * Flush the data device before flushing the log to make sure all meta
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) * data written back from the AIL actually made it to disk before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) * stamping the new log tail LSN into the log buffer. For an external
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) * log we need to issue the flush explicitly, and unfortunately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) * synchronously here; for an internal log we can simply use the block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) * layer state machine for preflushes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) if (log->l_targ != log->l_mp->m_ddev_targp || split) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) need_flush = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) xlog_verify_iclog(log, iclog, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) xlog_write_iclog(log, iclog, bno, count, need_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) * Deallocate a log structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) xlog_dealloc_log(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) xlog_in_core_t *iclog, *next_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) xlog_cil_destroy(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) * Cycle all the iclogbuf locks to make sure all log IO completion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) * is done before we tear down these buffers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) iclog = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) for (i = 0; i < log->l_iclog_bufs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) down(&iclog->ic_sema);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) up(&iclog->ic_sema);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) iclog = iclog->ic_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) iclog = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) for (i = 0; i < log->l_iclog_bufs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) next_iclog = iclog->ic_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) kmem_free(iclog->ic_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) kmem_free(iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) iclog = next_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) log->l_mp->m_log = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) destroy_workqueue(log->l_ioend_workqueue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) kmem_free(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) * Update counters atomically now that memcpy is done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) xlog_state_finish_copy(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) int record_cnt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) int copy_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) lockdep_assert_held(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) iclog->ic_offset += copy_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) * print out info relating to regions written which consume
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) * the reservation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) xlog_print_tic_res(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) struct xlog_ticket *ticket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) uint i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) /* match with XLOG_REG_TYPE_* in xfs_log.h */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) #define REG_TYPE_STR(type, str) [XLOG_REG_TYPE_##type] = str
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) static char *res_type_str[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) REG_TYPE_STR(BFORMAT, "bformat"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) REG_TYPE_STR(BCHUNK, "bchunk"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) REG_TYPE_STR(EFI_FORMAT, "efi_format"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) REG_TYPE_STR(EFD_FORMAT, "efd_format"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) REG_TYPE_STR(IFORMAT, "iformat"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) REG_TYPE_STR(ICORE, "icore"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) REG_TYPE_STR(IEXT, "iext"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) REG_TYPE_STR(IBROOT, "ibroot"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) REG_TYPE_STR(ILOCAL, "ilocal"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) REG_TYPE_STR(IATTR_EXT, "iattr_ext"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) REG_TYPE_STR(IATTR_BROOT, "iattr_broot"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) REG_TYPE_STR(IATTR_LOCAL, "iattr_local"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) REG_TYPE_STR(QFORMAT, "qformat"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) REG_TYPE_STR(DQUOT, "dquot"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) REG_TYPE_STR(QUOTAOFF, "quotaoff"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) REG_TYPE_STR(LRHEADER, "LR header"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) REG_TYPE_STR(UNMOUNT, "unmount"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) REG_TYPE_STR(COMMIT, "commit"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) REG_TYPE_STR(TRANSHDR, "trans header"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) REG_TYPE_STR(ICREATE, "inode create"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) REG_TYPE_STR(RUI_FORMAT, "rui_format"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) REG_TYPE_STR(RUD_FORMAT, "rud_format"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) REG_TYPE_STR(CUI_FORMAT, "cui_format"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) REG_TYPE_STR(CUD_FORMAT, "cud_format"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) REG_TYPE_STR(BUI_FORMAT, "bui_format"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) REG_TYPE_STR(BUD_FORMAT, "bud_format"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) BUILD_BUG_ON(ARRAY_SIZE(res_type_str) != XLOG_REG_TYPE_MAX + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) #undef REG_TYPE_STR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) xfs_warn(mp, "ticket reservation summary:");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) xfs_warn(mp, " unit res = %d bytes",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) ticket->t_unit_res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) xfs_warn(mp, " current res = %d bytes",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) ticket->t_curr_res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) xfs_warn(mp, " total reg = %u bytes (o/flow = %u bytes)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) ticket->t_res_arr_sum, ticket->t_res_o_flow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) xfs_warn(mp, " ophdrs = %u (ophdr space = %u bytes)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) ticket->t_res_num_ophdrs, ophdr_spc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) xfs_warn(mp, " ophdr + reg = %u bytes",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) ticket->t_res_arr_sum + ticket->t_res_o_flow + ophdr_spc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) xfs_warn(mp, " num regions = %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) ticket->t_res_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) for (i = 0; i < ticket->t_res_num; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) uint r_type = ticket->t_res_arr[i].r_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) xfs_warn(mp, "region[%u]: %s - %u bytes", i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) "bad-rtype" : res_type_str[r_type]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) ticket->t_res_arr[i].r_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) * Print a summary of the transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) xlog_print_trans(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) struct xfs_trans *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) struct xfs_mount *mp = tp->t_mountp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) struct xfs_log_item *lip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) /* dump core transaction and ticket info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) xfs_warn(mp, "transaction summary:");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) xfs_warn(mp, " log res = %d", tp->t_log_res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) xfs_warn(mp, " log count = %d", tp->t_log_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) xfs_warn(mp, " flags = 0x%x", tp->t_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) xlog_print_tic_res(mp, tp->t_ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) /* dump each log item */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) list_for_each_entry(lip, &tp->t_items, li_trans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) struct xfs_log_vec *lv = lip->li_lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) struct xfs_log_iovec *vec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) xfs_warn(mp, "log item: ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) xfs_warn(mp, " type = 0x%x", lip->li_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) xfs_warn(mp, " flags = 0x%lx", lip->li_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) if (!lv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) xfs_warn(mp, " niovecs = %d", lv->lv_niovecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) xfs_warn(mp, " size = %d", lv->lv_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) xfs_warn(mp, " bytes = %d", lv->lv_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) xfs_warn(mp, " buf len = %d", lv->lv_buf_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) /* dump each iovec for the log item */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) vec = lv->lv_iovecp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) for (i = 0; i < lv->lv_niovecs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) int dumplen = min(vec->i_len, 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) xfs_warn(mp, " iovec[%d]", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) xfs_warn(mp, " type = 0x%x", vec->i_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) xfs_warn(mp, " len = %d", vec->i_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) xfs_warn(mp, " first %d bytes of iovec[%d]:", dumplen, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) xfs_hex_dump(vec->i_addr, dumplen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) vec++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) * Calculate the potential space needed by the log vector. We may need a start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) * record, and each region gets its own struct xlog_op_header and may need to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) * double word aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) xlog_write_calc_vec_length(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) struct xlog_ticket *ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) struct xfs_log_vec *log_vector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) bool need_start_rec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) struct xfs_log_vec *lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) int headers = need_start_rec ? 1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) int len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) for (lv = log_vector; lv; lv = lv->lv_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) /* we don't write ordered log vectors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) headers += lv->lv_niovecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) for (i = 0; i < lv->lv_niovecs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) struct xfs_log_iovec *vecp = &lv->lv_iovecp[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) len += vecp->i_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) ticket->t_res_num_ophdrs += headers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) len += headers * sizeof(struct xlog_op_header);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) return len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) xlog_write_start_rec(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) struct xlog_op_header *ophdr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) struct xlog_ticket *ticket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) ophdr->oh_clientid = ticket->t_clientid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) ophdr->oh_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) ophdr->oh_flags = XLOG_START_TRANS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) ophdr->oh_res2 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) static xlog_op_header_t *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) xlog_write_setup_ophdr(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) struct xlog_op_header *ophdr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) struct xlog_ticket *ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) uint flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) ophdr->oh_clientid = ticket->t_clientid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) ophdr->oh_res2 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) /* are we copying a commit or unmount record? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) ophdr->oh_flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) * We've seen logs corrupted with bad transaction client ids. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) * makes sure that XFS doesn't generate them on. Turn this into an EIO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) * and shut down the filesystem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) switch (ophdr->oh_clientid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) case XFS_TRANSACTION:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) case XFS_VOLUME:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) case XFS_LOG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) xfs_warn(log->l_mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) "Bad XFS transaction clientid 0x%x in ticket "PTR_FMT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) ophdr->oh_clientid, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) return ophdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) * Set up the parameters of the region copy into the log. This has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) * to handle region write split across multiple log buffers - this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) * state is kept external to this function so that this code can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) * be written in an obvious, self documenting manner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) xlog_write_setup_copy(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) struct xlog_ticket *ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) struct xlog_op_header *ophdr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) int space_available,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) int space_required,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) int *copy_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) int *copy_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) int *last_was_partial_copy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) int *bytes_consumed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) int still_to_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) still_to_copy = space_required - *bytes_consumed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) *copy_off = *bytes_consumed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) if (still_to_copy <= space_available) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) /* write of region completes here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) *copy_len = still_to_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) ophdr->oh_len = cpu_to_be32(*copy_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) if (*last_was_partial_copy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) ophdr->oh_flags |= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) *last_was_partial_copy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) *bytes_consumed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) /* partial write of region, needs extra log op header reservation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) *copy_len = space_available;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) ophdr->oh_len = cpu_to_be32(*copy_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) if (*last_was_partial_copy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) ophdr->oh_flags |= XLOG_WAS_CONT_TRANS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) *bytes_consumed += *copy_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) (*last_was_partial_copy)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) /* account for new log op header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) ticket->t_curr_res -= sizeof(struct xlog_op_header);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) ticket->t_res_num_ophdrs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) return sizeof(struct xlog_op_header);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) xlog_write_copy_finish(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) uint flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) int *record_cnt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) int *data_cnt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) int *partial_copy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) int *partial_copy_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) int log_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) struct xlog_in_core **commit_iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) if (*partial_copy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) * This iclog has already been marked WANT_SYNC by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) * xlog_state_get_iclog_space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) *record_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) *data_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) goto release_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) *partial_copy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) *partial_copy_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) /* no more space in this iclog - push it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) *record_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) *data_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) if (iclog->ic_state == XLOG_STATE_ACTIVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) xlog_state_switch_iclogs(log, iclog, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) iclog->ic_state == XLOG_STATE_IOERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) if (!commit_iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) goto release_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) ASSERT(flags & XLOG_COMMIT_TRANS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) *commit_iclog = iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) release_iclog:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) error = xlog_state_release_iclog(log, iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) * Write some region out to in-core log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) * This will be called when writing externally provided regions or when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) * writing out a commit record for a given transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) * General algorithm:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) * 1. Find total length of this write. This may include adding to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) * lengths passed in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) * 2. Check whether we violate the tickets reservation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) * 3. While writing to this iclog
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) * A. Reserve as much space in this iclog as can get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) * B. If this is first write, save away start lsn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) * C. While writing this region:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) * 1. If first write of transaction, write start record
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) * 2. Write log operation header (header per region)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) * 3. Find out if we can fit entire region into this iclog
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) * 4. Potentially, verify destination memcpy ptr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) * 5. Memcpy (partial) region
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) * 6. If partial copy, release iclog; otherwise, continue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) * copying more regions into current iclog
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) * 4. Mark want sync bit (in simulation mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) * 5. Release iclog for potential flush to on-disk log.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) * ERRORS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) * 1. Panic if reservation is overrun. This should never happen since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) * reservation amounts are generated internal to the filesystem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) * NOTES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) * 1. Tickets are single threaded data structures.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) * 2. The XLOG_END_TRANS & XLOG_CONTINUE_TRANS flags are passed down to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) * syncing routine. When a single log_write region needs to span
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) * multiple in-core logs, the XLOG_CONTINUE_TRANS bit should be set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) * on all log operation writes which don't contain the end of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) * region. The XLOG_END_TRANS bit is used for the in-core log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) * operation which contains the end of the continued log_write region.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) * 3. When xlog_state_get_iclog_space() grabs the rest of the current iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) * we don't really know exactly how much space will be used. As a result,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) * we don't update ic_offset until the end when we know exactly how many
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) * bytes have been written out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) xlog_write(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) struct xfs_log_vec *log_vector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) struct xlog_ticket *ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) xfs_lsn_t *start_lsn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) struct xlog_in_core **commit_iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) uint flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) bool need_start_rec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) struct xlog_in_core *iclog = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) struct xfs_log_vec *lv = log_vector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) struct xfs_log_iovec *vecp = lv->lv_iovecp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) int index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) int partial_copy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) int partial_copy_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) int contwr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) int record_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) int data_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) * If this is a commit or unmount transaction, we don't need a start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) * record to be written. We do, however, have to account for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) * commit or unmount header that gets written. Hence we always have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) * to account for an extra xlog_op_header here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) ticket->t_curr_res -= sizeof(struct xlog_op_header);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) if (ticket->t_curr_res < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) "ctx ticket reservation ran out. Need to up reservation");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) xlog_print_tic_res(log->l_mp, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) len = xlog_write_calc_vec_length(ticket, log_vector, need_start_rec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) *start_lsn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) void *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) int log_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) &contwr, &log_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) ASSERT(log_offset <= iclog->ic_size - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) ptr = iclog->ic_datap + log_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) /* start_lsn is the first lsn written to. That's all we need. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) if (!*start_lsn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) * This loop writes out as many regions as can fit in the amount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) * of space which was allocated by xlog_state_get_iclog_space().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) struct xfs_log_iovec *reg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) struct xlog_op_header *ophdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) int copy_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) int copy_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) bool ordered = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) /* ordered log vectors have no regions to write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) ASSERT(lv->lv_niovecs == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) ordered = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) goto next_lv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) reg = &vecp[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) ASSERT(reg->i_len % sizeof(int32_t) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) * Before we start formatting log vectors, we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) * write a start record. Only do this for the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) * iclog we write to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) if (need_start_rec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) xlog_write_start_rec(ptr, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) xlog_write_adv_cnt(&ptr, &len, &log_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) sizeof(struct xlog_op_header));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) if (!ophdr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) xlog_write_adv_cnt(&ptr, &len, &log_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) sizeof(struct xlog_op_header));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) len += xlog_write_setup_copy(ticket, ophdr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) iclog->ic_size-log_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) reg->i_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) ©_off, ©_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) &partial_copy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) &partial_copy_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) xlog_verify_dest_ptr(log, ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) * Copy region.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) * Unmount records just log an opheader, so can have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) * empty payloads with no data region to copy. Hence we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) * only copy the payload if the vector says it has data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) * to copy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) ASSERT(copy_len >= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) if (copy_len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) memcpy(ptr, reg->i_addr + copy_off, copy_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) xlog_write_adv_cnt(&ptr, &len, &log_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) copy_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) copy_len += sizeof(struct xlog_op_header);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) record_cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) if (need_start_rec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) copy_len += sizeof(struct xlog_op_header);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) record_cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) need_start_rec = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) data_cnt += contwr ? copy_len : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) error = xlog_write_copy_finish(log, iclog, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) &record_cnt, &data_cnt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) &partial_copy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) &partial_copy_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) log_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) commit_iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) * if we had a partial copy, we need to get more iclog
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) * space but we don't want to increment the region
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) * index because there is still more is this region to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) * write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) * If we completed writing this region, and we flushed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) * the iclog (indicated by resetting of the record
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) * count), then we also need to get more log space. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) * this was the last record, though, we are done and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) * can just return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) if (partial_copy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) if (++index == lv->lv_niovecs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) next_lv:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) lv = lv->lv_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) if (lv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) vecp = lv->lv_iovecp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) if (record_cnt == 0 && !ordered) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) if (!lv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) ASSERT(len == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) if (commit_iclog) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) ASSERT(flags & XLOG_COMMIT_TRANS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) *commit_iclog = iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) error = xlog_state_release_iclog(log, iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) xlog_state_activate_iclog(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) int *iclogs_changed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) ASSERT(list_empty_careful(&iclog->ic_callbacks));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) * If the number of ops in this iclog indicate it just contains the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) * dummy transaction, we can change state into IDLE (the second time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) * around). Otherwise we should change the state into NEED a dummy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) * We don't need to cover the dummy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) if (*iclogs_changed == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) iclog->ic_header.h_num_logops == cpu_to_be32(XLOG_COVER_OPS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) *iclogs_changed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) * We have two dirty iclogs so start over. This could also be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) * num of ops indicating this is not the dummy going out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) *iclogs_changed = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) iclog->ic_state = XLOG_STATE_ACTIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) iclog->ic_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) iclog->ic_header.h_num_logops = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) memset(iclog->ic_header.h_cycle_data, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) sizeof(iclog->ic_header.h_cycle_data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) iclog->ic_header.h_lsn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) * Loop through all iclogs and mark all iclogs currently marked DIRTY as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) * ACTIVE after iclog I/O has completed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) xlog_state_activate_iclogs(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) int *iclogs_changed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) struct xlog_in_core *iclog = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) if (iclog->ic_state == XLOG_STATE_DIRTY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) xlog_state_activate_iclog(iclog, iclogs_changed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) * The ordering of marking iclogs ACTIVE must be maintained, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) * an iclog doesn't become ACTIVE beyond one that is SYNCING.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) else if (iclog->ic_state != XLOG_STATE_ACTIVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) } while ((iclog = iclog->ic_next) != log->l_iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) xlog_covered_state(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) int prev_state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) int iclogs_changed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) * We usually go to NEED. But we go to NEED2 if the changed indicates we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) * are done writing the dummy record. If we are done with the second
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) * dummy recored (DONE2), then we go to IDLE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) switch (prev_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) case XLOG_STATE_COVER_IDLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) case XLOG_STATE_COVER_NEED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) case XLOG_STATE_COVER_NEED2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) case XLOG_STATE_COVER_DONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) if (iclogs_changed == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) return XLOG_STATE_COVER_NEED2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) case XLOG_STATE_COVER_DONE2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) if (iclogs_changed == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) return XLOG_STATE_COVER_IDLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) ASSERT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) return XLOG_STATE_COVER_NEED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) xlog_state_clean_iclog(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) struct xlog_in_core *dirty_iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) int iclogs_changed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) dirty_iclog->ic_state = XLOG_STATE_DIRTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) xlog_state_activate_iclogs(log, &iclogs_changed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) wake_up_all(&dirty_iclog->ic_force_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) if (iclogs_changed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) log->l_covered_state = xlog_covered_state(log->l_covered_state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) iclogs_changed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) STATIC xfs_lsn_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) xlog_get_lowest_lsn(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) struct xlog_in_core *iclog = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) xfs_lsn_t lowest_lsn = 0, lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) if (iclog->ic_state == XLOG_STATE_ACTIVE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) iclog->ic_state == XLOG_STATE_DIRTY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) lsn = be64_to_cpu(iclog->ic_header.h_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) lowest_lsn = lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) } while ((iclog = iclog->ic_next) != log->l_iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) return lowest_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) * Completion of a iclog IO does not imply that a transaction has completed, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) * transactions can be large enough to span many iclogs. We cannot change the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) * tail of the log half way through a transaction as this may be the only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) * transaction in the log and moving the tail to point to the middle of it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) * will prevent recovery from finding the start of the transaction. Hence we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) * should only update the last_sync_lsn if this iclog contains transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) * completion callbacks on it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) * We have to do this before we drop the icloglock to ensure we are the only one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) * that can update it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) * If we are moving the last_sync_lsn forwards, we also need to ensure we kick
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) * the reservation grant head pushing. This is due to the fact that the push
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) * target is bound by the current last_sync_lsn value. Hence if we have a large
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) * amount of log space bound up in this committing transaction then the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) * last_sync_lsn value may be the limiting factor preventing tail pushing from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) * freeing space in the log. Hence once we've updated the last_sync_lsn we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) * should push the AIL to ensure the push target (and hence the grant head) is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) * no longer bound by the old log head location and can move forwards and make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) * progress again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) xlog_state_set_callback(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) xfs_lsn_t header_lsn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) iclog->ic_state = XLOG_STATE_CALLBACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) header_lsn) <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) if (list_empty_careful(&iclog->ic_callbacks))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) atomic64_set(&log->l_last_sync_lsn, header_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) xlog_grant_push_ail(log, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) * Return true if we need to stop processing, false to continue to the next
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) * iclog. The caller will need to run callbacks if the iclog is returned in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) * XLOG_STATE_CALLBACK state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) static bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) xlog_state_iodone_process_iclog(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) bool *ioerror)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) xfs_lsn_t lowest_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) xfs_lsn_t header_lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) switch (iclog->ic_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) case XLOG_STATE_ACTIVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) case XLOG_STATE_DIRTY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) * Skip all iclogs in the ACTIVE & DIRTY states:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) case XLOG_STATE_IOERROR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) * Between marking a filesystem SHUTDOWN and stopping the log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) * we do flush all iclogs to disk (if there wasn't a log I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) * error). So, we do want things to go smoothly in case of just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) * a SHUTDOWN w/o a LOG_IO_ERROR.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) *ioerror = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) case XLOG_STATE_DONE_SYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) * Now that we have an iclog that is in the DONE_SYNC state, do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) * one more check here to see if we have chased our tail around.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) * If this is not the lowest lsn iclog, then we will leave it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) * for another completion to process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) header_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) lowest_lsn = xlog_get_lowest_lsn(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) xlog_state_set_callback(log, iclog, header_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) * Can only perform callbacks in order. Since this iclog is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) * in the DONE_SYNC state, we skip the rest and just try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) * clean up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) * Keep processing entries in the iclog callback list until we come around and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) * it is empty. We need to atomically see that the list is empty and change the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) * state to DIRTY so that we don't miss any more callbacks being added.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) * This function is called with the icloglock held and returns with it held. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) * drop it while running callbacks, however, as holding it over thousands of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) * callbacks is unnecessary and causes excessive contention if we do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) xlog_state_do_iclog_callbacks(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) struct xlog_in_core *iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) __releases(&log->l_icloglock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) __acquires(&log->l_icloglock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) spin_lock(&iclog->ic_callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) while (!list_empty(&iclog->ic_callbacks)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) LIST_HEAD(tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) list_splice_init(&iclog->ic_callbacks, &tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) spin_unlock(&iclog->ic_callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) xlog_cil_process_committed(&tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) spin_lock(&iclog->ic_callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) * Pick up the icloglock while still holding the callback lock so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) * serialise against anyone trying to add more callbacks to this iclog
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) * now we've finished processing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) spin_unlock(&iclog->ic_callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) xlog_state_do_callback(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) struct xlog_in_core *iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) struct xlog_in_core *first_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) bool cycled_icloglock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) bool ioerror;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) int flushcnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) int repeats = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) * Scan all iclogs starting with the one pointed to by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) * log. Reset this starting point each time the log is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) * unlocked (during callbacks).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) * Keep looping through iclogs until one full pass is made
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) * without running any callbacks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) first_iclog = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) iclog = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) cycled_icloglock = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) ioerror = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) repeats++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) if (xlog_state_iodone_process_iclog(log, iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) &ioerror))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) if (iclog->ic_state != XLOG_STATE_CALLBACK &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) iclog->ic_state != XLOG_STATE_IOERROR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) iclog = iclog->ic_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) * Running callbacks will drop the icloglock which means
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) * we'll have to run at least one more complete loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) cycled_icloglock = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) xlog_state_do_iclog_callbacks(log, iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) if (XLOG_FORCED_SHUTDOWN(log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) wake_up_all(&iclog->ic_force_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) xlog_state_clean_iclog(log, iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) iclog = iclog->ic_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) } while (first_iclog != iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) if (repeats > 5000) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) flushcnt += repeats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) repeats = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) xfs_warn(log->l_mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) "%s: possible infinite loop (%d iterations)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) __func__, flushcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) } while (!ioerror && cycled_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) log->l_iclog->ic_state == XLOG_STATE_IOERROR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) wake_up_all(&log->l_flush_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) * Finish transitioning this iclog to the dirty state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) * Make sure that we completely execute this routine only when this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) * the last call to the iclog. There is a good chance that iclog flushes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) * when we reach the end of the physical log, get turned into 2 separate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) * calls to bwrite. Hence, one iclog flush could generate two calls to this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) * routine. By using the reference count bwritecnt, we guarantee that only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) * the second completion goes through.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) * Callbacks could take time, so they are done outside the scope of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) * global state machine log lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) xlog_state_done_syncing(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) struct xlog_in_core *iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) struct xlog *log = iclog->ic_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) * If we got an error, either on the first buffer, or in the case of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) * split log writes, on the second, we shut down the file system and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) * no iclogs should ever be attempted to be written to disk again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) if (!XLOG_FORCED_SHUTDOWN(log)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) ASSERT(iclog->ic_state == XLOG_STATE_SYNCING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) iclog->ic_state = XLOG_STATE_DONE_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) * Someone could be sleeping prior to writing out the next
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) * iclog buffer, we wake them all, one will get to do the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) * I/O, the others get to wait for the result.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) wake_up_all(&iclog->ic_write_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) xlog_state_do_callback(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) * sleep. We wait on the flush queue on the head iclog as that should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) * the first iclog to complete flushing. Hence if all iclogs are syncing,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) * we will wait here and all new writes will sleep until a sync completes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) * The in-core logs are used in a circular fashion. They are not used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) * out-of-order even when an iclog past the head is free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) * return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) * * log_offset where xlog_write() can start writing into the in-core
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) * log's data space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) * * in-core log pointer to which xlog_write() should write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) * * boolean indicating this is a continued write to an in-core log.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) * If this is the last write, then the in-core log's offset field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) * needs to be incremented, depending on the amount of data which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) * is copied.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) xlog_state_get_iclog_space(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) int len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) struct xlog_in_core **iclogp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) struct xlog_ticket *ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) int *continued_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) int *logoffsetp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) int log_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) xlog_rec_header_t *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) xlog_in_core_t *iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) if (XLOG_FORCED_SHUTDOWN(log)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) iclog = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) if (iclog->ic_state != XLOG_STATE_ACTIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) XFS_STATS_INC(log->l_mp, xs_log_noiclogs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) /* Wait for log writes to have flushed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) xlog_wait(&log->l_flush_wait, &log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) head = &iclog->ic_header;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) atomic_inc(&iclog->ic_refcnt); /* prevents sync */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) log_offset = iclog->ic_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) /* On the 1st write to an iclog, figure out lsn. This works
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) * if iclogs marked XLOG_STATE_WANT_SYNC always write out what they are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) * committing to. If the offset is set, that's how many blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) * must be written.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) if (log_offset == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) ticket->t_curr_res -= log->l_iclog_hsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) xlog_tic_add_region(ticket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) log->l_iclog_hsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) XLOG_REG_TYPE_LRHEADER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) head->h_cycle = cpu_to_be32(log->l_curr_cycle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) head->h_lsn = cpu_to_be64(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) ASSERT(log->l_curr_block >= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) /* If there is enough room to write everything, then do it. Otherwise,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) * claim the rest of the region and make sure the XLOG_STATE_WANT_SYNC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) * bit is on, so this will get flushed out. Don't update ic_offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) * until you know exactly how many bytes get copied. Therefore, wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) * until later to update ic_offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) * xlog_write() algorithm assumes that at least 2 xlog_op_header_t's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) * can fit into remaining data section.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) * If we are the only one writing to this iclog, sync it to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) * disk. We need to do an atomic compare and decrement here to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) * avoid racing with concurrent atomic_dec_and_lock() calls in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) * xlog_state_release_iclog() when there is more than one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) * reference to the iclog.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) error = xlog_state_release_iclog(log, iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) /* Do we have enough room to write the full amount in the remainder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) * of this iclog? Or must we continue a write on the next iclog and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) * mark this iclog as completely taken? In the case where we switch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) * iclogs (to mark it taken), this particular iclog will release/sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) * to disk in xlog_write().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) if (len <= iclog->ic_size - iclog->ic_offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) *continued_write = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) iclog->ic_offset += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) *continued_write = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) *iclogp = iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) ASSERT(iclog->ic_offset <= iclog->ic_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) *logoffsetp = log_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) * The first cnt-1 times a ticket goes through here we don't need to move the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) * grant write head because the permanent reservation has reserved cnt times the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) * unit amount. Release part of current permanent unit reservation and reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) * current reservation to be one units worth. Also move grant reservation head
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) * forward.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) xfs_log_ticket_regrant(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) struct xlog_ticket *ticket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) trace_xfs_log_ticket_regrant(log, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) if (ticket->t_cnt > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) ticket->t_cnt--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) xlog_grant_sub_space(log, &log->l_reserve_head.grant,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) ticket->t_curr_res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) xlog_grant_sub_space(log, &log->l_write_head.grant,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) ticket->t_curr_res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) ticket->t_curr_res = ticket->t_unit_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) xlog_tic_reset_res(ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) trace_xfs_log_ticket_regrant_sub(log, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) /* just return if we still have some of the pre-reserved space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) if (!ticket->t_cnt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) xlog_grant_add_space(log, &log->l_reserve_head.grant,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) ticket->t_unit_res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) trace_xfs_log_ticket_regrant_exit(log, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) ticket->t_curr_res = ticket->t_unit_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) xlog_tic_reset_res(ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) xfs_log_ticket_put(ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) * Give back the space left from a reservation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) * All the information we need to make a correct determination of space left
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) * is present. For non-permanent reservations, things are quite easy. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) * count should have been decremented to zero. We only need to deal with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) * space remaining in the current reservation part of the ticket. If the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) * ticket contains a permanent reservation, there may be left over space which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) * needs to be released. A count of N means that N-1 refills of the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) * reservation can be done before we need to ask for more space. The first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) * one goes to fill up the first current reservation. Once we run out of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) * space, the count will stay at zero and the only space remaining will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) * in the current reservation field.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) xfs_log_ticket_ungrant(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) struct xlog_ticket *ticket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) int bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) trace_xfs_log_ticket_ungrant(log, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) if (ticket->t_cnt > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) ticket->t_cnt--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) trace_xfs_log_ticket_ungrant_sub(log, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) * If this is a permanent reservation ticket, we may be able to free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) * up more space based on the remaining count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) bytes = ticket->t_curr_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) if (ticket->t_cnt > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) bytes += ticket->t_unit_res*ticket->t_cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) xlog_grant_sub_space(log, &log->l_write_head.grant, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) trace_xfs_log_ticket_ungrant_exit(log, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) xfs_log_space_wake(log->l_mp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) xfs_log_ticket_put(ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) * This routine will mark the current iclog in the ring as WANT_SYNC and move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) * the current iclog pointer to the next iclog in the ring.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) xlog_state_switch_iclogs(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) int eventual_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) assert_spin_locked(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) if (!eventual_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) eventual_size = iclog->ic_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) iclog->ic_state = XLOG_STATE_WANT_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) iclog->ic_header.h_prev_block = cpu_to_be32(log->l_prev_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) log->l_prev_block = log->l_curr_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) log->l_prev_cycle = log->l_curr_cycle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) /* roll log?: ic_offset changed later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) log->l_curr_block += BTOBB(eventual_size)+BTOBB(log->l_iclog_hsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) /* Round up to next log-sunit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) log->l_mp->m_sb.sb_logsunit > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) log->l_curr_block = roundup(log->l_curr_block, sunit_bb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) if (log->l_curr_block >= log->l_logBBsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) * Rewind the current block before the cycle is bumped to make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) * sure that the combined LSN never transiently moves forward
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) * when the log wraps to the next cycle. This is to support the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) * unlocked sample of these fields from xlog_valid_lsn(). Most
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) * other cases should acquire l_icloglock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) log->l_curr_block -= log->l_logBBsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) ASSERT(log->l_curr_block >= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) log->l_curr_cycle++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) log->l_curr_cycle++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) ASSERT(iclog == log->l_iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) log->l_iclog = iclog->ic_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) * Write out all data in the in-core log as of this exact moment in time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) * Data may be written to the in-core log during this call. However,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) * we don't guarantee this data will be written out. A change from past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) * implementation means this routine will *not* write out zero length LRs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) * Basically, we try and perform an intelligent scan of the in-core logs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) * If we determine there is no flushable data, we just return. There is no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) * flushable data if:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) * 1. the current iclog is active and has no data; the previous iclog
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) * is in the active or dirty state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) * 2. the current iclog is drity, and the previous iclog is in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) * active or dirty state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) * We may sleep if:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) * 1. the current iclog is not in the active nor dirty state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) * 2. the current iclog dirty, and the previous iclog is not in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) * active nor dirty state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) * 3. the current iclog is active, and there is another thread writing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) * to this particular iclog.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) * 4. a) the current iclog is active and has no other writers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) * b) when we return from flushing out this iclog, it is still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) * not in the active nor dirty state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) xfs_log_force(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) uint flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) struct xlog *log = mp->m_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) struct xlog_in_core *iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) xfs_lsn_t lsn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) XFS_STATS_INC(mp, xs_log_force);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) trace_xfs_log_force(mp, 0, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) xlog_cil_force(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) iclog = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) if (iclog->ic_state == XLOG_STATE_IOERROR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) goto out_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) if (iclog->ic_state == XLOG_STATE_DIRTY ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) (iclog->ic_state == XLOG_STATE_ACTIVE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) atomic_read(&iclog->ic_refcnt) == 0 && iclog->ic_offset == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) * If the head is dirty or (active and empty), then we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) * look at the previous iclog.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) * If the previous iclog is active or dirty we are done. There
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) * is nothing to sync out. Otherwise, we attach ourselves to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) * previous iclog and go to sleep.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) iclog = iclog->ic_prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) } else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) if (atomic_read(&iclog->ic_refcnt) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) * We are the only one with access to this iclog.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) * Flush it out now. There should be a roundoff of zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) * to show that someone has already taken care of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) * roundoff from the previous sync.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) atomic_inc(&iclog->ic_refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) lsn = be64_to_cpu(iclog->ic_header.h_lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) xlog_state_switch_iclogs(log, iclog, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) if (xlog_state_release_iclog(log, iclog))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) goto out_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) * Someone else is writing to this iclog.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) * Use its call to flush out the data. However, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) * other thread may not force out this LR, so we mark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) * it WANT_SYNC.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) xlog_state_switch_iclogs(log, iclog, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) * If the head iclog is not active nor dirty, we just attach
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) * ourselves to the head and go to sleep if necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) if (flags & XFS_LOG_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) return xlog_wait_on_iclog(iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) out_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) __xfs_log_force_lsn(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) xfs_lsn_t lsn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) uint flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) int *log_flushed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) bool already_slept)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) struct xlog *log = mp->m_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) struct xlog_in_core *iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) iclog = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) if (iclog->ic_state == XLOG_STATE_IOERROR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) goto out_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) iclog = iclog->ic_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) if (iclog == log->l_iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) if (iclog->ic_state == XLOG_STATE_ACTIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) * We sleep here if we haven't already slept (e.g. this is the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) * first time we've looked at the correct iclog buf) and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) * buffer before us is going to be sync'ed. The reason for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) * is that if we are doing sync transactions here, by waiting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) * for the previous I/O to complete, we can allow a few more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) * transactions into this iclog before we close it down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) * Otherwise, we mark the buffer WANT_SYNC, and bump up the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) * refcnt so we can release the log (which drops the ref count).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) * The state switch keeps new transaction commits from using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) * this buffer. When the current commits finish writing into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) * the buffer, the refcount will drop to zero and the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) * will go out then.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) if (!already_slept &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) XFS_STATS_INC(mp, xs_log_force_sleep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) xlog_wait(&iclog->ic_prev->ic_write_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) &log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) atomic_inc(&iclog->ic_refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) xlog_state_switch_iclogs(log, iclog, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) if (xlog_state_release_iclog(log, iclog))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) goto out_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) if (log_flushed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) *log_flushed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) if (flags & XFS_LOG_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) return xlog_wait_on_iclog(iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) out_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) * Force the in-core log to disk for a specific LSN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) * Find in-core log with lsn.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) * If it is in the DIRTY state, just return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) * If it is in the ACTIVE state, move the in-core log into the WANT_SYNC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) * state and go to sleep or return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) * If it is in any other state, go to sleep or return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) * Synchronous forces are implemented with a wait queue. All callers trying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) * to force a given lsn to disk must wait on the queue attached to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) * specific in-core log. When given in-core log finally completes its write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) * to disk, that thread will wake up all threads waiting on the queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) xfs_log_force_lsn(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) xfs_lsn_t lsn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) uint flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) int *log_flushed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) ASSERT(lsn != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) XFS_STATS_INC(mp, xs_log_force);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) trace_xfs_log_force(mp, lsn, _RET_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) lsn = xlog_cil_force_lsn(mp->m_log, lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) if (lsn == NULLCOMMITLSN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) if (ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) * Free a used ticket when its refcount falls to zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) xfs_log_ticket_put(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) xlog_ticket_t *ticket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) ASSERT(atomic_read(&ticket->t_ref) > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) if (atomic_dec_and_test(&ticket->t_ref))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) kmem_cache_free(xfs_log_ticket_zone, ticket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) xlog_ticket_t *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) xfs_log_ticket_get(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) xlog_ticket_t *ticket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) ASSERT(atomic_read(&ticket->t_ref) > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) atomic_inc(&ticket->t_ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) return ticket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) * Figure out the total log space unit (in bytes) that would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) * required for a log ticket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) xfs_log_calc_unit_res(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) int unit_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) struct xlog *log = mp->m_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) int iclog_space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) uint num_headers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) * Permanent reservations have up to 'cnt'-1 active log operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) * in the log. A unit in this case is the amount of space for one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) * of these log operations. Normal reservations have a cnt of 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) * and their unit amount is the total amount of space required.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) * The following lines of code account for non-transaction data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) * which occupy space in the on-disk log.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) * Normal form of a transaction is:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) * and then there are LR hdrs, split-recs and roundoff at end of syncs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) * We need to account for all the leadup data and trailer data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) * around the transaction data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) * And then we need to account for the worst case in terms of using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) * more space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) * The worst case will happen if:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) * - the placement of the transaction happens to be such that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) * roundoff is at its maximum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) * - the transaction data is synced before the commit record is synced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) * i.e. <transaction-data><roundoff> | <commit-rec><roundoff>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) * Therefore the commit record is in its own Log Record.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) * This can happen as the commit record is called with its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) * own region to xlog_write().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) * This then means that in the worst case, roundoff can happen for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) * the commit-rec as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) * The commit-rec is smaller than padding in this scenario and so it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) * not added separately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) /* for trans header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) unit_bytes += sizeof(xlog_op_header_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) unit_bytes += sizeof(xfs_trans_header_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) /* for start-rec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) unit_bytes += sizeof(xlog_op_header_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) * for LR headers - the space for data in an iclog is the size minus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) * the space used for the headers. If we use the iclog size, then we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) * undercalculate the number of headers required.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) * Furthermore - the addition of op headers for split-recs might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) * increase the space required enough to require more log and op
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) * headers, so take that into account too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) * IMPORTANT: This reservation makes the assumption that if this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) * transaction is the first in an iclog and hence has the LR headers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) * accounted to it, then the remaining space in the iclog is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) * exclusively for this transaction. i.e. if the transaction is larger
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) * than the iclog, it will be the only thing in that iclog.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) * Fundamentally, this means we must pass the entire log vector to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) * xlog_write to guarantee this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) iclog_space = log->l_iclog_size - log->l_iclog_hsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) num_headers = howmany(unit_bytes, iclog_space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) /* for split-recs - ophdrs added when data split over LRs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) unit_bytes += sizeof(xlog_op_header_t) * num_headers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) /* add extra header reservations if we overrun */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) while (!num_headers ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) howmany(unit_bytes, iclog_space) > num_headers) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) unit_bytes += sizeof(xlog_op_header_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) num_headers++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) unit_bytes += log->l_iclog_hsize * num_headers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) /* for commit-rec LR header - note: padding will subsume the ophdr */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) unit_bytes += log->l_iclog_hsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) /* for roundoff padding for transaction data and one for commit record */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) /* log su roundoff */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) unit_bytes += 2 * mp->m_sb.sb_logsunit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) /* BB roundoff */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) unit_bytes += 2 * BBSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) return unit_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) * Allocate and initialise a new log ticket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) struct xlog_ticket *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) xlog_ticket_alloc(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) int unit_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) int cnt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) char client,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) bool permanent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) struct xlog_ticket *tic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) int unit_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) tic = kmem_cache_zalloc(xfs_log_ticket_zone, GFP_NOFS | __GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) unit_res = xfs_log_calc_unit_res(log->l_mp, unit_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) atomic_set(&tic->t_ref, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) tic->t_task = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) INIT_LIST_HEAD(&tic->t_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) tic->t_unit_res = unit_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) tic->t_curr_res = unit_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) tic->t_cnt = cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) tic->t_ocnt = cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) tic->t_tid = prandom_u32();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) tic->t_clientid = client;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) if (permanent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) tic->t_flags |= XLOG_TIC_PERM_RESERV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) xlog_tic_reset_res(tic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) return tic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) #if defined(DEBUG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) * Make sure that the destination ptr is within the valid data region of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) * one of the iclogs. This uses backup pointers stored in a different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) * part of the log in case we trash the log structure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) xlog_verify_dest_ptr(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) void *ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) int good_ptr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) for (i = 0; i < log->l_iclog_bufs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) if (ptr >= log->l_iclog_bak[i] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) ptr <= log->l_iclog_bak[i] + log->l_iclog_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) good_ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) if (!good_ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) * Check to make sure the grant write head didn't just over lap the tail. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) * the cycles are the same, we can't be overlapping. Otherwise, make sure that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) * the cycles differ by exactly one and check the byte count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) * This check is run unlocked, so can give false positives. Rather than assert
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) * on failures, use a warn-once flag and a panic tag to allow the admin to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) * determine if they want to panic the machine when such an error occurs. For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) * debug kernels this will have the same effect as using an assert but, unlinke
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) * an assert, it can be turned off at runtime.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) xlog_verify_grant_tail(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) int tail_cycle, tail_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) int cycle, space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) if (tail_cycle != cycle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) if (cycle - 1 != tail_cycle &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) !(log->l_flags & XLOG_TAIL_WARN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) "%s: cycle - 1 != tail_cycle", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) log->l_flags |= XLOG_TAIL_WARN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) if (space > BBTOB(tail_blocks) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) !(log->l_flags & XLOG_TAIL_WARN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) "%s: space > BBTOB(tail_blocks)", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) log->l_flags |= XLOG_TAIL_WARN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) /* check if it will fit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) xlog_verify_tail_lsn(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) xfs_lsn_t tail_lsn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) int blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) blocks =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) if (BLOCK_LSN(tail_lsn) == log->l_prev_block)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) xfs_emerg(log->l_mp, "%s: tail wrapped", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) if (blocks < BTOBB(iclog->ic_offset) + 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) * Perform a number of checks on the iclog before writing to disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) * 1. Make sure the iclogs are still circular
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) * 2. Make sure we have a good magic number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) * 3. Make sure we don't have magic numbers in the data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) * 4. Check fields of each log operation header for:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) * A. Valid client identifier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) * B. tid ptr value falls in valid ptr space (user space code)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) * C. Length in log record header is correct according to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) * individual operation headers within record.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) * 5. When a bwrite will occur within 5 blocks of the front of the physical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) * log, check the preceding blocks of the physical log to make sure all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) * the cycle numbers agree with the current cycle number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) STATIC void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) xlog_verify_iclog(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) struct xlog *log,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) struct xlog_in_core *iclog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) xlog_op_header_t *ophead;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) xlog_in_core_t *icptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) xlog_in_core_2_t *xhdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) void *base_ptr, *ptr, *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) ptrdiff_t field_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) uint8_t clientid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) int len, i, j, k, op_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) int idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) /* check validity of iclog pointers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) icptr = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) for (i = 0; i < log->l_iclog_bufs; i++, icptr = icptr->ic_next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) ASSERT(icptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) if (icptr != log->l_iclog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) /* check log magic numbers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) base_ptr = ptr = &iclog->ic_header;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) p = &iclog->ic_header;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) for (ptr += BBSIZE; ptr < base_ptr + count; ptr += BBSIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) xfs_emerg(log->l_mp, "%s: unexpected magic num",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) /* check fields */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) len = be32_to_cpu(iclog->ic_header.h_num_logops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) base_ptr = ptr = iclog->ic_datap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) ophead = ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) xhdr = iclog->ic_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) for (i = 0; i < len; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) ophead = ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) /* clientid is only 1 byte */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) p = &ophead->oh_clientid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) field_offset = p - base_ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) if (field_offset & 0x1ff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) clientid = ophead->oh_clientid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) clientid = xlog_get_client_id(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) xhdr[j].hic_xheader.xh_cycle_data[k]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) clientid = xlog_get_client_id(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) iclog->ic_header.h_cycle_data[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) if (clientid != XFS_TRANSACTION && clientid != XFS_LOG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) xfs_warn(log->l_mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) "%s: invalid clientid %d op "PTR_FMT" offset 0x%lx",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) __func__, clientid, ophead,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) (unsigned long)field_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) /* check length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) p = &ophead->oh_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) field_offset = p - base_ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) if (field_offset & 0x1ff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) op_len = be32_to_cpu(ophead->oh_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) idx = BTOBBT((uintptr_t)&ophead->oh_len -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) (uintptr_t)iclog->ic_datap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) op_len = be32_to_cpu(xhdr[j].hic_xheader.xh_cycle_data[k]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) op_len = be32_to_cpu(iclog->ic_header.h_cycle_data[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) ptr += sizeof(xlog_op_header_t) + op_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) * Mark all iclogs IOERROR. l_icloglock is held by the caller.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) xlog_state_ioerror(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) xlog_in_core_t *iclog, *ic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) iclog = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) if (iclog->ic_state != XLOG_STATE_IOERROR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) * Mark all the incore logs IOERROR.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) * From now on, no log flushes will result.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) ic = iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) ic->ic_state = XLOG_STATE_IOERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) ic = ic->ic_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) } while (ic != iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) * Return non-zero, if state transition has already happened.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) * This is called from xfs_force_shutdown, when we're forcibly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) * shutting down the filesystem, typically because of an IO error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) * Our main objectives here are to make sure that:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) * a. if !logerror, flush the logs to disk. Anything modified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) * after this is ignored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) * b. the filesystem gets marked 'SHUTDOWN' for all interested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) * parties to find out, 'atomically'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) * c. those who're sleeping on log reservations, pinned objects and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) * other resources get woken up, and be told the bad news.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) * d. nothing new gets queued up after (b) and (c) are done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) * Note: for the !logerror case we need to flush the regions held in memory out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) * to disk first. This needs to be done before the log is marked as shutdown,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) * otherwise the iclog writes will fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) xfs_log_force_umount(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) int logerror)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) struct xlog *log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) int retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) log = mp->m_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) * If this happens during log recovery, don't worry about
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) * locking; the log isn't open for business yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) if (!log ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) log->l_flags & XLOG_ACTIVE_RECOVERY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) if (mp->m_sb_bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) mp->m_sb_bp->b_flags |= XBF_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) * Somebody could've already done the hard work for us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) * No need to get locks for this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) if (logerror && log->l_iclog->ic_state == XLOG_STATE_IOERROR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) ASSERT(XLOG_FORCED_SHUTDOWN(log));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) * Flush all the completed transactions to disk before marking the log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) * being shut down. We need to do it in this order to ensure that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) * completed operations are safely on disk before we shut down, and that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) * we don't have to issue any buffer IO after the shutdown flags are set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) * to guarantee this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) if (!logerror)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) xfs_log_force(mp, XFS_LOG_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) * mark the filesystem and the as in a shutdown state and wake
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) * everybody up to tell them the bad news.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) if (mp->m_sb_bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) mp->m_sb_bp->b_flags |= XBF_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) * Mark the log and the iclogs with IO error flags to prevent any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) * further log IO from being issued or completed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) log->l_flags |= XLOG_IO_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) retval = xlog_state_ioerror(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) * We don't want anybody waiting for log reservations after this. That
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750) * means we have to wake up everybody queued up on reserveq as well as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) * writeq. In addition, we make sure in xlog_{re}grant_log_space that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) * we don't enqueue anything once the SHUTDOWN flag is set, and this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) * action is protected by the grant locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) xlog_grant_head_wake_all(&log->l_reserve_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) xlog_grant_head_wake_all(&log->l_write_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) * Wake up everybody waiting on xfs_log_force. Wake the CIL push first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) * as if the log writes were completed. The abort handling in the log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) * item committed callback functions will do this again under lock to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) * avoid races.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) spin_lock(&log->l_cilp->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) wake_up_all(&log->l_cilp->xc_commit_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) spin_unlock(&log->l_cilp->xc_push_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) xlog_state_do_callback(log);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) /* return non-zero if log IOERROR transition had already happened */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) STATIC int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774) xlog_iclogs_empty(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) struct xlog *log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) xlog_in_core_t *iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) iclog = log->l_iclog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) /* endianness does not matter here, zero is zero in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) * any language.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) if (iclog->ic_header.h_num_logops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) iclog = iclog->ic_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) } while (iclog != log->l_iclog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) * Verify that an LSN stamped into a piece of metadata is valid. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) * intended for use in read verifiers on v5 superblocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) xfs_log_check_lsn(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) xfs_lsn_t lsn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) struct xlog *log = mp->m_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) bool valid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) * norecovery mode skips mount-time log processing and unconditionally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) * resets the in-core LSN. We can't validate in this mode, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) * modifications are not allowed anyways so just return true.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) if (mp->m_flags & XFS_MOUNT_NORECOVERY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) * handled by recovery and thus safe to ignore here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) if (lsn == NULLCOMMITLSN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) valid = xlog_valid_lsn(mp->m_log, lsn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) /* warn the user about what's gone wrong before verifier failure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) if (!valid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) spin_lock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) xfs_warn(mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) "Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) "Please unmount and run xfs_repair (>= v4.3) to resolve.",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) CYCLE_LSN(lsn), BLOCK_LSN(lsn),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) log->l_curr_cycle, log->l_curr_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) spin_unlock(&log->l_icloglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) return valid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) xfs_log_in_recovery(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) struct xfs_mount *mp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) struct xlog *log = mp->m_log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) return log->l_flags & XLOG_ACTIVE_RECOVERY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) }