^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * linux/fs/jbd2/transaction.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Copyright 1998 Red Hat corp --- All Rights Reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Generic filesystem transaction handling code; part of the ext2fs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * journaling system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * This file manages transactions (compound commits managed by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * journaling code) and handles (individual atomic operations by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * filesystem).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/time.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/jbd2.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/timer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/hrtimer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/bug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <trace/events/jbd2.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) static struct kmem_cache *transaction_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) int __init jbd2_journal_init_transaction_cache(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) J_ASSERT(!transaction_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) transaction_cache = kmem_cache_create("jbd2_transaction_s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) sizeof(transaction_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) if (!transaction_cache) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) pr_emerg("JBD2: failed to create transaction cache\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) void jbd2_journal_destroy_transaction_cache(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) kmem_cache_destroy(transaction_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) transaction_cache = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) void jbd2_journal_free_transaction(transaction_t *transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) if (unlikely(ZERO_OR_NULL_PTR(transaction)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) kmem_cache_free(transaction_cache, transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * Base amount of descriptor blocks we reserve for each transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) static int jbd2_descriptor_blocks_per_trans(journal_t *journal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) int tag_space = journal->j_blocksize - sizeof(journal_header_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) int tags_per_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /* Subtract UUID */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) tag_space -= 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) if (jbd2_journal_has_csum_v2or3(journal))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) tag_space -= sizeof(struct jbd2_journal_block_tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) /* Commit code leaves a slack space of 16 bytes at the end of block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * Revoke descriptors are accounted separately so we need to reserve
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * space for commit block and normal transaction descriptor blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) tags_per_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * jbd2_get_transaction: obtain a new transaction_t object.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * Simply initialise a new transaction. Initialize it in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * RUNNING state and add it to the current journal (which should not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * have an existing running transaction: we only make a new transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * once we have started to commit the old one).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * Preconditions:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * The journal MUST be locked. We don't perform atomic mallocs on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * new transaction and we can't block without protecting against other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * processes trying to touch the journal while it is in transition.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) static void jbd2_get_transaction(journal_t *journal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) transaction_t *transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) transaction->t_journal = journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) transaction->t_state = T_RUNNING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) transaction->t_start_time = ktime_get();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) transaction->t_tid = journal->j_transaction_sequence++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) transaction->t_expires = jiffies + journal->j_commit_interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) spin_lock_init(&transaction->t_handle_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) atomic_set(&transaction->t_updates, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) atomic_set(&transaction->t_outstanding_credits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) jbd2_descriptor_blocks_per_trans(journal) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) atomic_read(&journal->j_reserved_credits));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) atomic_set(&transaction->t_outstanding_revokes, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) atomic_set(&transaction->t_handle_count, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) INIT_LIST_HEAD(&transaction->t_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) INIT_LIST_HEAD(&transaction->t_private_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) /* Set up the commit timer for the new transaction. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) add_timer(&journal->j_commit_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) J_ASSERT(journal->j_running_transaction == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) journal->j_running_transaction = transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) transaction->t_max_wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) transaction->t_start = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) transaction->t_requested = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * Handle management.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * A handle_t is an object which represents a single atomic update to a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * filesystem, and which tracks all of the modifications which form part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * of that one update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * Update transaction's maximum wait time, if debugging is enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * In order for t_max_wait to be reliable, it must be protected by a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * lock. But doing so will mean that start_this_handle() can not be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * run in parallel on SMP systems, which limits our scalability. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * unless debugging is enabled, we no longer update t_max_wait, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * means that maximum wait time reported by the jbd2_run_stats
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * tracepoint will always be zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) static inline void update_t_max_wait(transaction_t *transaction,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) unsigned long ts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) #ifdef CONFIG_JBD2_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) if (jbd2_journal_enable_debug &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) time_after(transaction->t_start, ts)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) ts = jbd2_time_diff(ts, transaction->t_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) spin_lock(&transaction->t_handle_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) if (ts > transaction->t_max_wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) transaction->t_max_wait = ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) spin_unlock(&transaction->t_handle_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * Wait until running transaction passes to T_FLUSH state and new transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * can thus be started. Also starts the commit if needed. The function expects
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * running transaction to exist and releases j_state_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) static void wait_transaction_locked(journal_t *journal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) __releases(journal->j_state_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) int need_to_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) tid_t tid = journal->j_running_transaction->t_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) need_to_start = !tid_geq(journal->j_commit_request, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) if (need_to_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) jbd2_log_start_commit(journal, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) jbd2_might_wait_for_commit(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) finish_wait(&journal->j_wait_transaction_locked, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * Wait until running transaction transitions from T_SWITCH to T_FLUSH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * state and new transaction can thus be started. The function releases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) * j_state_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) static void wait_transaction_switching(journal_t *journal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) __releases(journal->j_state_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) if (WARN_ON(!journal->j_running_transaction ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) journal->j_running_transaction->t_state != T_SWITCH)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * We don't call jbd2_might_wait_for_commit() here as there's no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) * waiting for outstanding handles happening anymore in T_SWITCH state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * and handling of reserved handles actually relies on that for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * correctness.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) finish_wait(&journal->j_wait_transaction_locked, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) static void sub_reserved_credits(journal_t *journal, int blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) atomic_sub(blocks, &journal->j_reserved_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) wake_up(&journal->j_wait_reserved);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) * Wait until we can add credits for handle to the running transaction. Called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * with j_state_lock held for reading. Returns 0 if handle joined the running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * transaction. Returns 1 if we had to wait, j_state_lock is dropped, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * caller must retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) static int add_transaction_credits(journal_t *journal, int blocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) int rsv_blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) transaction_t *t = journal->j_running_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) int needed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) int total = blocks + rsv_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * If the current transaction is locked down for commit, wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * for the lock to be released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) if (t->t_state != T_RUNNING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) WARN_ON_ONCE(t->t_state >= T_FLUSH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) wait_transaction_locked(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * If there is not enough space left in the log to write all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * potential buffers requested by this operation, we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * stall pending a log checkpoint to free some more log space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) needed = atomic_add_return(total, &t->t_outstanding_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) if (needed > journal->j_max_transaction_buffers) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * If the current transaction is already too large,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * then start to commit it: we can then go back and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * attach this handle to a new transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) atomic_sub(total, &t->t_outstanding_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) * Is the number of reserved credits in the current transaction too
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * big to fit this handle? Wait until reserved credits are freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) if (atomic_read(&journal->j_reserved_credits) + total >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) journal->j_max_transaction_buffers) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) jbd2_might_wait_for_commit(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) wait_event(journal->j_wait_reserved,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) atomic_read(&journal->j_reserved_credits) + total <=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) journal->j_max_transaction_buffers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) wait_transaction_locked(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) * The commit code assumes that it can get enough log space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * without forcing a checkpoint. This is *critical* for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * correctness: a checkpoint of a buffer which is also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * associated with a committing transaction creates a deadlock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * so commit simply cannot force through checkpoints.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * We must therefore ensure the necessary space in the journal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) * *before* starting to dirty potentially checkpointed buffers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * in the new transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) if (jbd2_log_space_left(journal) < journal->j_max_transaction_buffers) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) atomic_sub(total, &t->t_outstanding_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) jbd2_might_wait_for_commit(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) write_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) if (jbd2_log_space_left(journal) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) journal->j_max_transaction_buffers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) __jbd2_log_wait_for_space(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) write_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) /* No reservation? We are done... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) if (!rsv_blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) /* We allow at most half of a transaction to be reserved */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) if (needed > journal->j_max_transaction_buffers / 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) sub_reserved_credits(journal, rsv_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) atomic_sub(total, &t->t_outstanding_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) jbd2_might_wait_for_commit(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) wait_event(journal->j_wait_reserved,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) atomic_read(&journal->j_reserved_credits) + rsv_blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) <= journal->j_max_transaction_buffers / 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * start_this_handle: Given a handle, deal with any locking or stalling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * needed to make sure that there is enough journal space for the handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * to begin. Attach the handle to a transaction and set up the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * transaction's buffer credits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) static int start_this_handle(journal_t *journal, handle_t *handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) transaction_t *transaction, *new_transaction = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) int blocks = handle->h_total_credits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) int rsv_blocks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) unsigned long ts = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) if (handle->h_rsv_handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) rsv_blocks = handle->h_rsv_handle->h_total_credits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * Limit the number of reserved credits to 1/2 of maximum transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * size and limit the number of total credits to not exceed maximum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) * transaction size per operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) if ((rsv_blocks > journal->j_max_transaction_buffers / 2) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) (rsv_blocks + blocks > journal->j_max_transaction_buffers)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) printk(KERN_ERR "JBD2: %s wants too many credits "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) "credits:%d rsv_credits:%d max:%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) current->comm, blocks, rsv_blocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) journal->j_max_transaction_buffers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) alloc_transaction:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) * This check is racy but it is just an optimization of allocating new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) * transaction early if there are high chances we'll need it. If we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) * guess wrong, we'll retry or free unused transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) if (!data_race(journal->j_running_transaction)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) * If __GFP_FS is not present, then we may be being called from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) * inside the fs writeback layer, so we MUST NOT fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) if ((gfp_mask & __GFP_FS) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) gfp_mask |= __GFP_NOFAIL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) new_transaction = kmem_cache_zalloc(transaction_cache,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) if (!new_transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) jbd_debug(3, "New handle %p going live.\n", handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) * We need to hold j_state_lock until t_updates has been incremented,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) * for proper journal barrier handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) read_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) BUG_ON(journal->j_flags & JBD2_UNMOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) if (is_journal_aborted(journal) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) jbd2_journal_free_transaction(new_transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) * Wait on the journal's transaction barrier if necessary. Specifically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) * we allow reserved handles to proceed because otherwise commit could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) * deadlock on page writeback not being able to complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) if (!handle->h_reserved && journal->j_barrier_count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) wait_event(journal->j_wait_transaction_locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) journal->j_barrier_count == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) if (!journal->j_running_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) if (!new_transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) goto alloc_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) write_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) if (!journal->j_running_transaction &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) (handle->h_reserved || !journal->j_barrier_count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) jbd2_get_transaction(journal, new_transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) new_transaction = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) write_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) transaction = journal->j_running_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) if (!handle->h_reserved) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) /* We may have dropped j_state_lock - restart in that case */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) if (add_transaction_credits(journal, blocks, rsv_blocks))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) * We have handle reserved so we are allowed to join T_LOCKED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) * transaction and we don't have to check for transaction size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) * and journal space. But we still have to wait while running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) * transaction is being switched to a committing one as it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) * won't wait for any handles anymore.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) if (transaction->t_state == T_SWITCH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) wait_transaction_switching(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) sub_reserved_credits(journal, blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) handle->h_reserved = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) /* OK, account for the buffers that this operation expects to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) * use and add the handle to the running transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) update_t_max_wait(transaction, ts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) handle->h_transaction = transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) handle->h_requested_credits = blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) handle->h_revoke_credits_requested = handle->h_revoke_credits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) handle->h_start_jiffies = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) atomic_inc(&transaction->t_updates);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) atomic_inc(&transaction->t_handle_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) handle, blocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) atomic_read(&transaction->t_outstanding_credits),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) jbd2_log_space_left(journal));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) current->journal_info = handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) jbd2_journal_free_transaction(new_transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * Ensure that no allocations done while the transaction is open are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * going to recurse back to the fs layer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) handle->saved_alloc_context = memalloc_nofs_save();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) /* Allocate a new handle. This should probably be in a slab... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) static handle_t *new_handle(int nblocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) if (!handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) handle->h_total_credits = nblocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) handle->h_ref = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) return handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) int revoke_records, gfp_t gfp_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) unsigned int type, unsigned int line_no)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) handle_t *handle = journal_current_handle();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) if (!journal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) return ERR_PTR(-EROFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) if (handle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) J_ASSERT(handle->h_transaction->t_journal == journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) handle->h_ref++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) return handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) nblocks += DIV_ROUND_UP(revoke_records,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) journal->j_revoke_records_per_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) handle = new_handle(nblocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) if (!handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) if (rsv_blocks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) handle_t *rsv_handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) rsv_handle = new_handle(rsv_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) if (!rsv_handle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) jbd2_free_handle(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) rsv_handle->h_reserved = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) rsv_handle->h_journal = journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) handle->h_rsv_handle = rsv_handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) handle->h_revoke_credits = revoke_records;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) err = start_this_handle(journal, handle, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) if (handle->h_rsv_handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) jbd2_free_handle(handle->h_rsv_handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) jbd2_free_handle(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) return ERR_PTR(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) handle->h_type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) handle->h_line_no = line_no;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) handle->h_transaction->t_tid, type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) line_no, nblocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) return handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) EXPORT_SYMBOL(jbd2__journal_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * jbd2_journal_start() - Obtain a new handle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * @journal: Journal to start transaction on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * @nblocks: number of block buffer we might modify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) * We make sure that the transaction can guarantee at least nblocks of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * modified buffers in the log. We block until the log can guarantee
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * that much space. Additionally, if rsv_blocks > 0, we also create another
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) * handle with rsv_blocks reserved blocks in the journal. This handle is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) * stored in h_rsv_handle. It is not attached to any particular transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) * and thus doesn't block transaction commit. If the caller uses this reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) * on the parent handle will dispose the reserved one. Reserved handle has to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) * be converted to a normal handle using jbd2_journal_start_reserved() before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) * it can be used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) * Return a pointer to a newly allocated handle, or an ERR_PTR() value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) * on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) return jbd2__journal_start(journal, nblocks, 0, 0, GFP_NOFS, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) EXPORT_SYMBOL(jbd2_journal_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) static void __jbd2_journal_unreserve_handle(handle_t *handle, transaction_t *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) journal_t *journal = handle->h_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) WARN_ON(!handle->h_reserved);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) sub_reserved_credits(journal, handle->h_total_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) if (t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) atomic_sub(handle->h_total_credits, &t->t_outstanding_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) void jbd2_journal_free_reserved(handle_t *handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) journal_t *journal = handle->h_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) /* Get j_state_lock to pin running transaction if it exists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) read_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) __jbd2_journal_unreserve_handle(handle, journal->j_running_transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) jbd2_free_handle(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) EXPORT_SYMBOL(jbd2_journal_free_reserved);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) * jbd2_journal_start_reserved() - start reserved handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) * @handle: handle to start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) * @type: for handle statistics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) * @line_no: for handle statistics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) * Start handle that has been previously reserved with jbd2_journal_reserve().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) * This attaches @handle to the running transaction (or creates one if there's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) * not transaction running). Unlike jbd2_journal_start() this function cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) * block on journal commit, checkpointing, or similar stuff. It can block on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) * memory allocation or frozen journal though.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) * Return 0 on success, non-zero on error - handle is freed in that case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) unsigned int line_no)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) journal_t *journal = handle->h_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) int ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) if (WARN_ON(!handle->h_reserved)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) /* Someone passed in normal handle? Just stop it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) jbd2_journal_stop(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) * Usefulness of mixing of reserved and unreserved handles is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) * questionable. So far nobody seems to need it so just error out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) if (WARN_ON(current->journal_info)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) jbd2_journal_free_reserved(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) handle->h_journal = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) * GFP_NOFS is here because callers are likely from writeback or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) * similarly constrained call sites
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) ret = start_this_handle(journal, handle, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) handle->h_journal = journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) jbd2_journal_free_reserved(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) handle->h_type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) handle->h_line_no = line_no;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) handle->h_transaction->t_tid, type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) line_no, handle->h_total_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) EXPORT_SYMBOL(jbd2_journal_start_reserved);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) * jbd2_journal_extend() - extend buffer credits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) * @handle: handle to 'extend'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) * @nblocks: nr blocks to try to extend by.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) * @revoke_records: number of revoke records to try to extend by.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) * Some transactions, such as large extends and truncates, can be done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) * atomically all at once or in several stages. The operation requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) * a credit for a number of buffer modifications in advance, but can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) * extend its credit if it needs more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) * jbd2_journal_extend tries to give the running handle more buffer credits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) * It does not guarantee that allocation - this is a best-effort only.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) * The calling process MUST be able to deal cleanly with a failure to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) * extend here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) * Return 0 on success, non-zero on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) * return code < 0 implies an error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) * return code > 0 implies normal transaction-full status.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) transaction_t *transaction = handle->h_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) journal_t *journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) int result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) int wanted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) if (is_handle_aborted(handle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) journal = transaction->t_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) result = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) read_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) /* Don't extend a locked-down transaction! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) if (transaction->t_state != T_RUNNING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) jbd_debug(3, "denied handle %p %d blocks: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) "transaction not running\n", handle, nblocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) goto error_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) nblocks += DIV_ROUND_UP(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) handle->h_revoke_credits_requested + revoke_records,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) journal->j_revoke_records_per_block) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) DIV_ROUND_UP(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) handle->h_revoke_credits_requested,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) journal->j_revoke_records_per_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) spin_lock(&transaction->t_handle_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) wanted = atomic_add_return(nblocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) &transaction->t_outstanding_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) if (wanted > journal->j_max_transaction_buffers) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) jbd_debug(3, "denied handle %p %d blocks: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) "transaction too large\n", handle, nblocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) atomic_sub(nblocks, &transaction->t_outstanding_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) transaction->t_tid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) handle->h_type, handle->h_line_no,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) handle->h_total_credits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) nblocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) handle->h_total_credits += nblocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) handle->h_requested_credits += nblocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) handle->h_revoke_credits += revoke_records;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) handle->h_revoke_credits_requested += revoke_records;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) spin_unlock(&transaction->t_handle_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) error_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) return result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) static void stop_this_handle(handle_t *handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) transaction_t *transaction = handle->h_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) journal_t *journal = transaction->t_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) int revokes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) J_ASSERT(journal_current_handle() == handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) J_ASSERT(atomic_read(&transaction->t_updates) > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) current->journal_info = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) * Subtract necessary revoke descriptor blocks from handle credits. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) * take care to account only for revoke descriptor blocks the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) * transaction will really need as large sequences of transactions with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) * small numbers of revokes are relatively common.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) revokes = handle->h_revoke_credits_requested - handle->h_revoke_credits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) if (revokes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) int t_revokes, revoke_descriptors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) int rr_per_blk = journal->j_revoke_records_per_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) WARN_ON_ONCE(DIV_ROUND_UP(revokes, rr_per_blk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) > handle->h_total_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) t_revokes = atomic_add_return(revokes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) &transaction->t_outstanding_revokes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) revoke_descriptors =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) DIV_ROUND_UP(t_revokes, rr_per_blk) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) DIV_ROUND_UP(t_revokes - revokes, rr_per_blk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) handle->h_total_credits -= revoke_descriptors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) atomic_sub(handle->h_total_credits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) &transaction->t_outstanding_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) if (handle->h_rsv_handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) __jbd2_journal_unreserve_handle(handle->h_rsv_handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) if (atomic_dec_and_test(&transaction->t_updates))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) wake_up(&journal->j_wait_updates);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) * Scope of the GFP_NOFS context is over here and so we can restore the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) * original alloc context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) memalloc_nofs_restore(handle->saved_alloc_context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) * jbd2__journal_restart() - restart a handle .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) * @handle: handle to restart
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) * @nblocks: nr credits requested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) * @revoke_records: number of revoke record credits requested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) * @gfp_mask: memory allocation flags (for start_this_handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) * Restart a handle for a multi-transaction filesystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) * operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) * If the jbd2_journal_extend() call above fails to grant new buffer credits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) * to a running handle, a call to jbd2_journal_restart will commit the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) * handle's transaction so far and reattach the handle to a new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) * transaction capable of guaranteeing the requested number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) * credits. We preserve reserved handle if there's any attached to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) * passed in handle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) int jbd2__journal_restart(handle_t *handle, int nblocks, int revoke_records,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) transaction_t *transaction = handle->h_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) journal_t *journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) tid_t tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) int need_to_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) /* If we've had an abort of any type, don't even think about
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) * actually doing the restart! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) if (is_handle_aborted(handle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) journal = transaction->t_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) tid = transaction->t_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) * First unlink the handle from its current transaction, and start the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) * commit on that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) jbd_debug(2, "restarting handle %p\n", handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) stop_this_handle(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) handle->h_transaction = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) * TODO: If we use READ_ONCE / WRITE_ONCE for j_commit_request we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) * get rid of pointless j_state_lock traffic like this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) read_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) need_to_start = !tid_geq(journal->j_commit_request, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) if (need_to_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) jbd2_log_start_commit(journal, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) handle->h_total_credits = nblocks +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) DIV_ROUND_UP(revoke_records,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) journal->j_revoke_records_per_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) handle->h_revoke_credits = revoke_records;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) ret = start_this_handle(journal, handle, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) trace_jbd2_handle_restart(journal->j_fs_dev->bd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) ret ? 0 : handle->h_transaction->t_tid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) handle->h_type, handle->h_line_no,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) handle->h_total_credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) EXPORT_SYMBOL(jbd2__journal_restart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) int jbd2_journal_restart(handle_t *handle, int nblocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) return jbd2__journal_restart(handle, nblocks, 0, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) EXPORT_SYMBOL(jbd2_journal_restart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) * jbd2_journal_lock_updates () - establish a transaction barrier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) * @journal: Journal to establish a barrier on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) * This locks out any further updates from being started, and blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) * until all existing updates have completed, returning only once the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) * journal is in a quiescent state with no updates running.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) * The journal lock should not be held on entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) void jbd2_journal_lock_updates(journal_t *journal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) jbd2_might_wait_for_commit(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) write_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) ++journal->j_barrier_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) /* Wait until there are no reserved handles */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) if (atomic_read(&journal->j_reserved_credits)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) write_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) wait_event(journal->j_wait_reserved,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) atomic_read(&journal->j_reserved_credits) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) write_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) /* Wait until there are no running updates */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) transaction_t *transaction = journal->j_running_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) if (!transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) spin_lock(&transaction->t_handle_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) prepare_to_wait(&journal->j_wait_updates, &wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) if (!atomic_read(&transaction->t_updates)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) spin_unlock(&transaction->t_handle_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) finish_wait(&journal->j_wait_updates, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) spin_unlock(&transaction->t_handle_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) write_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) finish_wait(&journal->j_wait_updates, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) write_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) write_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) * We have now established a barrier against other normal updates, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) * we also need to barrier against other jbd2_journal_lock_updates() calls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) * to make sure that we serialise special journal-locked operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) * too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) mutex_lock(&journal->j_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) * jbd2_journal_unlock_updates () - release barrier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) * @journal: Journal to release the barrier on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) * Release a transaction barrier obtained with jbd2_journal_lock_updates().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) * Should be called without the journal lock held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) void jbd2_journal_unlock_updates (journal_t *journal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) J_ASSERT(journal->j_barrier_count != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) mutex_unlock(&journal->j_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) write_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) --journal->j_barrier_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) write_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) wake_up(&journal->j_wait_transaction_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) static void warn_dirty_buffer(struct buffer_head *bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) printk(KERN_WARNING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) "JBD2: Spotted dirty metadata buffer (dev = %pg, blocknr = %llu). "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) "There's a risk of filesystem corruption in case of system "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) "crash.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) bh->b_bdev, (unsigned long long)bh->b_blocknr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) /* Call t_frozen trigger and copy buffer data into jh->b_frozen_data. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) static void jbd2_freeze_jh_data(struct journal_head *jh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) int offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) char *source;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) struct buffer_head *bh = jh2bh(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) J_EXPECT_JH(jh, buffer_uptodate(bh), "Possible IO failure.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) page = bh->b_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) offset = offset_in_page(bh->b_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) source = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) /* Fire data frozen trigger just before we copy the data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) memcpy(jh->b_frozen_data, source + offset, bh->b_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) kunmap_atomic(source);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) * Now that the frozen data is saved off, we need to store any matching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) * triggers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) jh->b_frozen_triggers = jh->b_triggers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) * If the buffer is already part of the current transaction, then there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) * is nothing we need to do. If it is already part of a prior
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) * transaction which we are still committing to disk, then we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) * make sure that we do not overwrite the old copy: we do copy-out to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) * preserve the copy going to disk. We also account the buffer against
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) * the handle's metadata buffer credits (unless the buffer is already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) * part of the transaction, that is).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) do_get_write_access(handle_t *handle, struct journal_head *jh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) int force_copy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) struct buffer_head *bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) transaction_t *transaction = handle->h_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) journal_t *journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) char *frozen_buffer = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) unsigned long start_lock, time_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) journal = transaction->t_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) JBUFFER_TRACE(jh, "entry");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) bh = jh2bh(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) /* @@@ Need to check for errors here at some point. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) start_lock = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) lock_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) spin_lock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) /* If it takes too long to lock the buffer, trace it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) time_lock = jbd2_time_diff(start_lock, jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) if (time_lock > HZ/10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) trace_jbd2_lock_buffer_stall(bh->b_bdev->bd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) jiffies_to_msecs(time_lock));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) /* We now hold the buffer lock so it is safe to query the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) * state. Is the buffer dirty?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) * If so, there are two possibilities. The buffer may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) * non-journaled, and undergoing a quite legitimate writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) * Otherwise, it is journaled, and we don't expect dirty buffers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) * in that state (the buffers should be marked JBD_Dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) * instead.) So either the IO is being done under our own
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) * control and this is a bug, or it's a third party IO such as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) * dump(8) (which may leave the buffer scheduled for read ---
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) * ie. locked but not dirty) or tune2fs (which may actually have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) * the buffer dirtied, ugh.) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) if (buffer_dirty(bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) * First question: is this buffer already part of the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) * transaction or the existing committing transaction?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) if (jh->b_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) J_ASSERT_JH(jh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) jh->b_transaction == transaction ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) jh->b_transaction ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) journal->j_committing_transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) if (jh->b_next_transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) J_ASSERT_JH(jh, jh->b_next_transaction ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) warn_dirty_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) * In any case we need to clean the dirty flag and we must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) * do it under the buffer lock to be sure we don't race
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) * with running write-out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) JBUFFER_TRACE(jh, "Journalling dirty buffer");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) clear_buffer_dirty(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) set_buffer_jbddirty(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) unlock_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) error = -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) if (is_handle_aborted(handle)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) * The buffer is already part of this transaction if b_transaction or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) * b_next_transaction points to it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) if (jh->b_transaction == transaction ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) jh->b_next_transaction == transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) * this is the first time this transaction is touching this buffer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) * reset the modified flag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) jh->b_modified = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) * If the buffer is not journaled right now, we need to make sure it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) * doesn't get written to disk before the caller actually commits the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) * new data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) if (!jh->b_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) JBUFFER_TRACE(jh, "no transaction");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) J_ASSERT_JH(jh, !jh->b_next_transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) JBUFFER_TRACE(jh, "file as BJ_Reserved");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) * Make sure all stores to jh (b_modified, b_frozen_data) are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) * visible before attaching it to the running transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) * Paired with barrier in jbd2_write_access_granted()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) spin_lock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) * If there is already a copy-out version of this buffer, then we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) * need to make another one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) if (jh->b_frozen_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) JBUFFER_TRACE(jh, "has frozen data");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) goto attach_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) JBUFFER_TRACE(jh, "owned by older transaction");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) * There is one case we have to be very careful about. If the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) * committing transaction is currently writing this buffer out to disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) * and has NOT made a copy-out, then we cannot modify the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) * contents at all right now. The essence of copy-out is that it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) * the extra copy, not the primary copy, which gets journaled. If the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) * primary copy is already going to disk then we cannot do copy-out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) * here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) if (buffer_shadow(bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) JBUFFER_TRACE(jh, "on shadow: sleep");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) * Only do the copy if the currently-owning transaction still needs it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) * If buffer isn't on BJ_Metadata list, the committing transaction is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) * past that stage (here we use the fact that BH_Shadow is set under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) * bh_state lock together with refiling to BJ_Shadow list and at this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) * point we know the buffer doesn't have BH_Shadow set).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) * Subtle point, though: if this is a get_undo_access, then we will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) * relying on the frozen_data to contain the new value of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) * committed_data record after the transaction, so we HAVE to force the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) * frozen_data copy in that case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) if (jh->b_jlist == BJ_Metadata || force_copy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) JBUFFER_TRACE(jh, "generate frozen data");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) if (!frozen_buffer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) JBUFFER_TRACE(jh, "allocate memory for buffer");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) GFP_NOFS | __GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) jh->b_frozen_data = frozen_buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) frozen_buffer = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) jbd2_freeze_jh_data(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) attach_next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) * Make sure all stores to jh (b_modified, b_frozen_data) are visible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) * before attaching it to the running transaction. Paired with barrier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) * in jbd2_write_access_granted()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) jh->b_next_transaction = transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) * If we are about to journal a buffer, then any revoke pending on it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) * no longer valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) jbd2_journal_cancel_revoke(handle, jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) if (unlikely(frozen_buffer)) /* It's usually NULL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) jbd2_free(frozen_buffer, bh->b_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) JBUFFER_TRACE(jh, "exit");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) /* Fast check whether buffer is already attached to the required transaction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) bool undo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) struct journal_head *jh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) /* Dirty buffers require special handling... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) if (buffer_dirty(bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) * RCU protects us from dereferencing freed pages. So the checks we do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) * are guaranteed not to oops. However the jh slab object can get freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) * & reallocated while we work with it. So we have to be careful. When
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) * we see jh attached to the running transaction, we know it must stay
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) * so until the transaction is committed. Thus jh won't be freed and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) * will be attached to the same bh while we run. However it can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) * happen jh gets freed, reallocated, and attached to the transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) * just after we get pointer to it from bh. So we have to be careful
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) * and recheck jh still belongs to our bh before we return success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) if (!buffer_jbd(bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) /* This should be bh2jh() but that doesn't work with inline functions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) jh = READ_ONCE(bh->b_private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) if (!jh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) /* For undo access buffer must have data copied */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) if (undo && !jh->b_committed_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) if (READ_ONCE(jh->b_transaction) != handle->h_transaction &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) READ_ONCE(jh->b_next_transaction) != handle->h_transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) * There are two reasons for the barrier here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) * 1) Make sure to fetch b_bh after we did previous checks so that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) * detect when jh went through free, realloc, attach to transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) * while we were checking. Paired with implicit barrier in that path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) * 2) So that access to bh done after jbd2_write_access_granted()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) * doesn't get reordered and see inconsistent state of concurrent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) * do_get_write_access().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) if (unlikely(jh->b_bh != bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) * jbd2_journal_get_write_access() - notify intent to modify a buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) * for metadata (not data) update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) * @handle: transaction to add buffer modifications to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) * @bh: bh to be used for metadata writes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) * Returns: error code or 0 on success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) * In full data journalling mode the buffer may be of type BJ_AsyncData,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) * because we're ``write()ing`` a buffer which is also part of a shared mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) struct journal_head *jh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) if (is_handle_aborted(handle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) if (jbd2_write_access_granted(handle, bh, false))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) jh = jbd2_journal_add_journal_head(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) /* We do not want to get caught playing with fields which the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) * log thread also manipulates. Make sure that the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) * completes any outstanding IO before proceeding. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) rc = do_get_write_access(handle, jh, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) jbd2_journal_put_journal_head(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) * When the user wants to journal a newly created buffer_head
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) * (ie. getblk() returned a new buffer and we are going to populate it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) * manually rather than reading off disk), then we need to keep the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) * buffer_head locked until it has been completely filled with new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) * data. In this case, we should be able to make the assertion that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) * the bh is not already part of an existing transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) * The buffer should already be locked by the caller by this point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) * There is no lock ranking violation: it was a newly created,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) * unlocked buffer beforehand. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) * jbd2_journal_get_create_access () - notify intent to use newly created bh
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) * @handle: transaction to new buffer to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) * @bh: new buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) * Call this if you create a new bh.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) transaction_t *transaction = handle->h_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) journal_t *journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) struct journal_head *jh = jbd2_journal_add_journal_head(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) jbd_debug(5, "journal_head %p\n", jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) err = -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) if (is_handle_aborted(handle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) journal = transaction->t_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) JBUFFER_TRACE(jh, "entry");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) * The buffer may already belong to this transaction due to pre-zeroing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) * in the filesystem's new_block code. It may also be on the previous,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) * committing transaction's lists, but it HAS to be in Forget state in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) * that case: the transaction must have deleted the buffer for it to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) * reused here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) spin_lock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) jh->b_transaction == NULL ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) (jh->b_transaction == journal->j_committing_transaction &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) jh->b_jlist == BJ_Forget)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) if (jh->b_transaction == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) * Previous jbd2_journal_forget() could have left the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) * with jbddirty bit set because it was being committed. When
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) * the commit finished, we've filed the buffer for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) * checkpointing and marked it dirty. Now we are reallocating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) * the buffer so the transaction freeing it must have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) * committed and so it's safe to clear the dirty bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) clear_buffer_dirty(jh2bh(jh));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) /* first access by this transaction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) jh->b_modified = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) JBUFFER_TRACE(jh, "file as BJ_Reserved");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) spin_lock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) } else if (jh->b_transaction == journal->j_committing_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) /* first access by this transaction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) jh->b_modified = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) JBUFFER_TRACE(jh, "set next transaction");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) spin_lock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) jh->b_next_transaction = transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) * akpm: I added this. ext3_alloc_branch can pick up new indirect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) * blocks which contain freed but then revoked metadata. We need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) * to cancel the revoke in case we end up freeing it yet again
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) * and the reallocating as data - this would cause a second revoke,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) * which hits an assertion error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) JBUFFER_TRACE(jh, "cancelling revoke");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) jbd2_journal_cancel_revoke(handle, jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) jbd2_journal_put_journal_head(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) * jbd2_journal_get_undo_access() - Notify intent to modify metadata with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) * non-rewindable consequences
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) * @handle: transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) * @bh: buffer to undo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) * Sometimes there is a need to distinguish between metadata which has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) * been committed to disk and that which has not. The ext3fs code uses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) * this for freeing and allocating space, we have to make sure that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) * do not reuse freed space until the deallocation has been committed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) * since if we overwrote that space we would make the delete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) * un-rewindable in case of a crash.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) * To deal with that, jbd2_journal_get_undo_access requests write access to a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) * buffer for parts of non-rewindable operations such as delete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) * operations on the bitmaps. The journaling code must keep a copy of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) * the buffer's contents prior to the undo_access call until such time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) * as we know that the buffer has definitely been committed to disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) * We never need to know which transaction the committed data is part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) * of, buffers touched here are guaranteed to be dirtied later and so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) * will be committed to a new transaction in due course, at which point
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) * we can discard the old committed data pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) * Returns error number or 0 on success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) struct journal_head *jh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) char *committed_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) if (is_handle_aborted(handle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) if (jbd2_write_access_granted(handle, bh, true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) jh = jbd2_journal_add_journal_head(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) JBUFFER_TRACE(jh, "entry");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) * Do this first --- it can drop the journal lock, so we want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) * make sure that obtaining the committed_data is done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) * atomically wrt. completion of any outstanding commits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) err = do_get_write_access(handle, jh, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) if (!jh->b_committed_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) committed_data = jbd2_alloc(jh2bh(jh)->b_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) GFP_NOFS|__GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) spin_lock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) if (!jh->b_committed_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) /* Copy out the current buffer contents into the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) * preserved, committed copy. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) JBUFFER_TRACE(jh, "generate b_committed data");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) if (!committed_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) jh->b_committed_data = committed_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) committed_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) jbd2_journal_put_journal_head(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) if (unlikely(committed_data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) jbd2_free(committed_data, bh->b_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) * jbd2_journal_set_triggers() - Add triggers for commit writeout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) * @bh: buffer to trigger on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) * @type: struct jbd2_buffer_trigger_type containing the trigger(s).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) * Set any triggers on this journal_head. This is always safe, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) * triggers for a committing buffer will be saved off, and triggers for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) * a running transaction will match the buffer in that transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) * Call with NULL to clear the triggers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) void jbd2_journal_set_triggers(struct buffer_head *bh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) struct jbd2_buffer_trigger_type *type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) struct journal_head *jh = jbd2_journal_grab_journal_head(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) if (WARN_ON(!jh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) jh->b_triggers = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) jbd2_journal_put_journal_head(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) struct jbd2_buffer_trigger_type *triggers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) struct buffer_head *bh = jh2bh(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) if (!triggers || !triggers->t_frozen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) void jbd2_buffer_abort_trigger(struct journal_head *jh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) struct jbd2_buffer_trigger_type *triggers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) if (!triggers || !triggers->t_abort)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) triggers->t_abort(triggers, jh2bh(jh));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) * jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) * @handle: transaction to add buffer to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) * @bh: buffer to mark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) * mark dirty metadata which needs to be journaled as part of the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) * transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) * The buffer must have previously had jbd2_journal_get_write_access()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) * called so that it has a valid journal_head attached to the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) * head.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) * The buffer is placed on the transaction's metadata list and is marked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) * as belonging to the transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) * Returns error number or 0 on success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) * Special care needs to be taken if the buffer already belongs to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) * current committing transaction (in which case we should have frozen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) * data present for that commit). In that case, we don't relink the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) * buffer: that only gets done when the old transaction finally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) * completes its commit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) transaction_t *transaction = handle->h_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) journal_t *journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) struct journal_head *jh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) if (is_handle_aborted(handle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) if (!buffer_jbd(bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) return -EUCLEAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) * We don't grab jh reference here since the buffer must be part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) * of the running transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) jh = bh2jh(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) jbd_debug(5, "journal_head %p\n", jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) JBUFFER_TRACE(jh, "entry");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) * This and the following assertions are unreliable since we may see jh
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) * in inconsistent state unless we grab bh_state lock. But this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) * crucial to catch bugs so let's do a reliable check until the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) * lockless handling is fully proven.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) if (data_race(jh->b_transaction != transaction &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) jh->b_next_transaction != transaction)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) spin_lock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) J_ASSERT_JH(jh, jh->b_transaction == transaction ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) jh->b_next_transaction == transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) if (jh->b_modified == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) /* If it's in our transaction it must be in BJ_Metadata list. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) if (data_race(jh->b_transaction == transaction &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) jh->b_jlist != BJ_Metadata)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) spin_lock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) if (jh->b_transaction == transaction &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) jh->b_jlist != BJ_Metadata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) pr_err("JBD2: assertion failure: h_type=%u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) "h_line_no=%u block_no=%llu jlist=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) handle->h_type, handle->h_line_no,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) (unsigned long long) bh->b_blocknr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) jh->b_jlist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) J_ASSERT_JH(jh, jh->b_transaction != transaction ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) jh->b_jlist == BJ_Metadata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) journal = transaction->t_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) spin_lock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) if (jh->b_modified == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) * This buffer's got modified and becoming part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) * of the transaction. This needs to be done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) * once a transaction -bzzz
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) if (WARN_ON_ONCE(jbd2_handle_buffer_credits(handle) <= 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) goto out_unlock_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) jh->b_modified = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) handle->h_total_credits--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) * fastpath, to avoid expensive locking. If this buffer is already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) * on the running transaction's metadata list there is nothing to do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) * Nobody can take it off again because there is a handle open.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) * I _think_ we're OK here with SMP barriers - a mistaken decision will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) * result in this test being false, so we go in and take the locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) JBUFFER_TRACE(jh, "fastpath");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) if (unlikely(jh->b_transaction !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) journal->j_running_transaction)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) printk(KERN_ERR "JBD2: %s: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) "jh->b_transaction (%llu, %p, %u) != "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) "journal->j_running_transaction (%p, %u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) journal->j_devname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) (unsigned long long) bh->b_blocknr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) jh->b_transaction,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) jh->b_transaction ? jh->b_transaction->t_tid : 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) journal->j_running_transaction,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) journal->j_running_transaction ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) journal->j_running_transaction->t_tid : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) goto out_unlock_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) set_buffer_jbddirty(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) * Metadata already on the current transaction list doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) * need to be filed. Metadata on another transaction's list must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) * be committing, and will be refiled once the commit completes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) * leave it alone for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) if (jh->b_transaction != transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) JBUFFER_TRACE(jh, "already on other transaction");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) if (unlikely(((jh->b_transaction !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) journal->j_committing_transaction)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) (jh->b_next_transaction != transaction))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) printk(KERN_ERR "jbd2_journal_dirty_metadata: %s: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) "bad jh for block %llu: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) "transaction (%p, %u), "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) "jh->b_transaction (%p, %u), "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) "jh->b_next_transaction (%p, %u), jlist %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) journal->j_devname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) (unsigned long long) bh->b_blocknr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) transaction, transaction->t_tid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) jh->b_transaction,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) jh->b_transaction ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) jh->b_transaction->t_tid : 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) jh->b_next_transaction,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) jh->b_next_transaction ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) jh->b_next_transaction->t_tid : 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) jh->b_jlist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) /* And this case is illegal: we can't reuse another
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) * transaction's data buffer, ever. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) goto out_unlock_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) /* That test should have eliminated the following case: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) JBUFFER_TRACE(jh, "file as BJ_Metadata");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) spin_lock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) out_unlock_bh:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) JBUFFER_TRACE(jh, "exit");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) * jbd2_journal_forget() - bforget() for potentially-journaled buffers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) * @handle: transaction handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) * @bh: bh to 'forget'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) * We can only do the bforget if there are no commits pending against the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) * buffer. If the buffer is dirty in the current running transaction we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) * can safely unlink it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) * bh may not be a journalled buffer at all - it may be a non-JBD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) * buffer which came off the hashtable. Check for this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) * Decrements bh->b_count by one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) * Allow this call even if the handle has aborted --- it may be part of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) * the caller's cleanup after an abort.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) transaction_t *transaction = handle->h_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) journal_t *journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) struct journal_head *jh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) int drop_reserve = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) int was_modified = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) if (is_handle_aborted(handle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) journal = transaction->t_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) BUFFER_TRACE(bh, "entry");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) jh = jbd2_journal_grab_journal_head(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) if (!jh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) __bforget(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) spin_lock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) /* Critical error: attempting to delete a bitmap buffer, maybe?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) * Don't do any jbd operations, and return an error. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) if (!J_EXPECT_JH(jh, !jh->b_committed_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) "inconsistent data on disk")) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) /* keep track of whether or not this transaction modified us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) was_modified = jh->b_modified;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) * The buffer's going from the transaction, we must drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) * all references -bzzz
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) jh->b_modified = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) if (jh->b_transaction == transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) J_ASSERT_JH(jh, !jh->b_frozen_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) /* If we are forgetting a buffer which is already part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) * of this transaction, then we can just drop it from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) * the transaction immediately. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) clear_buffer_dirty(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) clear_buffer_jbddirty(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) * we only want to drop a reference if this transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) * modified the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) if (was_modified)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) drop_reserve = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) * We are no longer going to journal this buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) * However, the commit of this transaction is still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) * important to the buffer: the delete that we are now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) * processing might obsolete an old log entry, so by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) * committing, we can satisfy the buffer's checkpoint.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) * So, if we have a checkpoint on the buffer, we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) * now refile the buffer on our BJ_Forget list so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) * we know to remove the checkpoint after we commit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) spin_lock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) if (jh->b_cp_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) __jbd2_journal_temp_unlink_buffer(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) __jbd2_journal_unfile_buffer(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) jbd2_journal_put_journal_head(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) } else if (jh->b_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) J_ASSERT_JH(jh, (jh->b_transaction ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) journal->j_committing_transaction));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) /* However, if the buffer is still owned by a prior
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) * (committing) transaction, we can't drop it yet... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) JBUFFER_TRACE(jh, "belongs to older transaction");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) /* ... but we CAN drop it from the new transaction through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) * marking the buffer as freed and set j_next_transaction to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) * the new transaction, so that not only the commit code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) * knows it should clear dirty bits when it is done with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) * buffer, but also the buffer can be checkpointed only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) * after the new transaction commits. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) set_buffer_freed(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) if (!jh->b_next_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) spin_lock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) jh->b_next_transaction = transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) J_ASSERT(jh->b_next_transaction == transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) * only drop a reference if this transaction modified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) * the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) if (was_modified)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) drop_reserve = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) * Finally, if the buffer is not belongs to any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) * transaction, we can just drop it now if it has no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) * checkpoint.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) spin_lock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) if (!jh->b_cp_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) JBUFFER_TRACE(jh, "belongs to none transaction");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) * Otherwise, if the buffer has been written to disk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) * it is safe to remove the checkpoint and drop it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) if (!buffer_dirty(bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) __jbd2_journal_remove_checkpoint(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) * The buffer is still not written to disk, we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) * attach this buffer to current transaction so that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) * buffer can be checkpointed only after the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) * transaction commits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) clear_buffer_dirty(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) drop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) __brelse(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) jbd2_journal_put_journal_head(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) if (drop_reserve) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) /* no need to reserve log space for this block -bzzz */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) handle->h_total_credits++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) * jbd2_journal_stop() - complete a transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) * @handle: transaction to complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) * All done for a particular handle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) * There is not much action needed here. We just return any remaining
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) * buffer credits to the transaction and remove the handle. The only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) * complication is that we need to start a commit operation if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) * filesystem is marked for synchronous update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) * jbd2_journal_stop itself will not usually return an error, but it may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) * do so in unusual circumstances. In particular, expect it to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) * return -EIO if a jbd2_journal_abort has been executed since the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) * transaction began.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) int jbd2_journal_stop(handle_t *handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) transaction_t *transaction = handle->h_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) journal_t *journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) int err = 0, wait_for_commit = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) tid_t tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) pid_t pid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) if (--handle->h_ref > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) handle->h_ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) if (is_handle_aborted(handle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) if (!transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) * Handle is already detached from the transaction so there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) * nothing to do other than free the handle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) memalloc_nofs_restore(handle->saved_alloc_context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) goto free_and_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) journal = transaction->t_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) tid = transaction->t_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) if (is_handle_aborted(handle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) jbd_debug(4, "Handle %p going down\n", handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) tid, handle->h_type, handle->h_line_no,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) jiffies - handle->h_start_jiffies,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) handle->h_sync, handle->h_requested_credits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) (handle->h_requested_credits -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) handle->h_total_credits));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) * Implement synchronous transaction batching. If the handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) * was synchronous, don't force a commit immediately. Let's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) * yield and let another thread piggyback onto this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) * transaction. Keep doing that while new threads continue to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) * arrive. It doesn't cost much - we're about to run a commit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) * and sleep on IO anyway. Speeds up many-threaded, many-dir
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) * operations by 30x or more...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) * We try and optimize the sleep time against what the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) * underlying disk can do, instead of having a static sleep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) * time. This is useful for the case where our storage is so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) * fast that it is more optimal to go ahead and force a flush
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) * and wait for the transaction to be committed than it is to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) * wait for an arbitrary amount of time for new writers to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) * join the transaction. We achieve this by measuring how
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) * long it takes to commit a transaction, and compare it with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) * how long this transaction has been running, and if run time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) * < commit time then we sleep for the delta and commit. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) * greatly helps super fast disks that would see slowdowns as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) * more threads started doing fsyncs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) * But don't do this if this process was the most recent one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) * to perform a synchronous write. We do this to detect the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) * case where a single process is doing a stream of sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) * writes. No point in waiting for joiners in that case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) * Setting max_batch_time to 0 disables this completely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) pid = current->pid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) if (handle->h_sync && journal->j_last_sync_writer != pid &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) journal->j_max_batch_time) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) u64 commit_time, trans_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) journal->j_last_sync_writer = pid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) read_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) commit_time = journal->j_average_commit_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) trans_time = ktime_to_ns(ktime_sub(ktime_get(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) transaction->t_start_time));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) commit_time = max_t(u64, commit_time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 1000*journal->j_min_batch_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) commit_time = min_t(u64, commit_time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 1000*journal->j_max_batch_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) if (trans_time < commit_time) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) ktime_t expires = ktime_add_ns(ktime_get(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) commit_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) set_current_state(TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) if (handle->h_sync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) transaction->t_synchronous_commit = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) * If the handle is marked SYNC, we need to set another commit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) * going! We also want to force a commit if the transaction is too
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) * old now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) if (handle->h_sync ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) time_after_eq(jiffies, transaction->t_expires)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) /* Do this even for aborted journals: an abort still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) * completes the commit thread, it just doesn't write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) * anything to disk. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) jbd_debug(2, "transaction too old, requesting commit for "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) "handle %p\n", handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) /* This is non-blocking */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) jbd2_log_start_commit(journal, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) * Special case: JBD2_SYNC synchronous updates require us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) * to wait for the commit to complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) if (handle->h_sync && !(current->flags & PF_MEMALLOC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) wait_for_commit = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) * Once stop_this_handle() drops t_updates, the transaction could start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) * committing on us and eventually disappear. So we must not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) * dereference transaction pointer again after calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) * stop_this_handle().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) stop_this_handle(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) if (wait_for_commit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) err = jbd2_log_wait_commit(journal, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) free_and_exit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) if (handle->h_rsv_handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) jbd2_free_handle(handle->h_rsv_handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) jbd2_free_handle(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) * List management code snippets: various functions for manipulating the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) * transaction buffer lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) * Append a buffer to a transaction list, given the transaction's list head
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) * pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) * j_list_lock is held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) * jh->b_state_lock is held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) if (!*list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) jh->b_tnext = jh->b_tprev = jh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) *list = jh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) /* Insert at the tail of the list to preserve order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) struct journal_head *first = *list, *last = first->b_tprev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) jh->b_tprev = last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) jh->b_tnext = first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) last->b_tnext = first->b_tprev = jh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) * Remove a buffer from a transaction list, given the transaction's list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) * head pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) * Called with j_list_lock held, and the journal may not be locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) * jh->b_state_lock is held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) if (*list == jh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) *list = jh->b_tnext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) if (*list == jh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) *list = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) jh->b_tprev->b_tnext = jh->b_tnext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) jh->b_tnext->b_tprev = jh->b_tprev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) * Remove a buffer from the appropriate transaction list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) * Note that this function can *change* the value of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) * t_reserved_list. If the caller is holding onto a copy of one of these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) * pointers, it could go bad. Generally the caller needs to re-read the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) * pointer from the transaction_t.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) * Called under j_list_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) struct journal_head **list = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) transaction_t *transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) struct buffer_head *bh = jh2bh(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) lockdep_assert_held(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) transaction = jh->b_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) if (transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) assert_spin_locked(&transaction->t_journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) if (jh->b_jlist != BJ_None)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) J_ASSERT_JH(jh, transaction != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) switch (jh->b_jlist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) case BJ_None:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) case BJ_Metadata:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) transaction->t_nr_buffers--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) list = &transaction->t_buffers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) case BJ_Forget:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) list = &transaction->t_forget;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) case BJ_Shadow:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) list = &transaction->t_shadow_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) case BJ_Reserved:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) list = &transaction->t_reserved_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) __blist_del_buffer(list, jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) jh->b_jlist = BJ_None;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) if (transaction && is_journal_aborted(transaction->t_journal))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) clear_buffer_jbddirty(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) else if (test_clear_buffer_jbddirty(bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) mark_buffer_dirty(bh); /* Expose it to the VM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) * Remove buffer from all transactions. The caller is responsible for dropping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) * the jh reference that belonged to the transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) * Called with bh_state lock and j_list_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) J_ASSERT_JH(jh, jh->b_transaction != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) __jbd2_journal_temp_unlink_buffer(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) jh->b_transaction = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) struct buffer_head *bh = jh2bh(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) /* Get reference so that buffer cannot be freed before we unlock it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) get_bh(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) spin_lock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) spin_lock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) __jbd2_journal_unfile_buffer(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) jbd2_journal_put_journal_head(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) __brelse(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) * Called from jbd2_journal_try_to_free_buffers().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) * Called under jh->b_state_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) struct journal_head *jh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) jh = bh2jh(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) if (buffer_locked(bh) || buffer_dirty(bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) if (jh->b_next_transaction != NULL || jh->b_transaction != NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) spin_lock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) if (jh->b_cp_transaction != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) /* written-back checkpointed metadata buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) JBUFFER_TRACE(jh, "remove from checkpoint list");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) __jbd2_journal_remove_checkpoint(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) * jbd2_journal_try_to_free_buffers() - try to free page buffers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) * @journal: journal for operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) * @page: to try and free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) * For all the buffers on this page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) * if they are fully written out ordered data, move them onto BUF_CLEAN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) * so try_to_free_buffers() can reap them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) * This function returns non-zero if we wish try_to_free_buffers()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) * to be called. We do this if the page is releasable by try_to_free_buffers().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) * We also do it if the page has locked or dirty buffers and the caller wants
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) * us to perform sync or async writeout.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) * This complicates JBD locking somewhat. We aren't protected by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) * BKL here. We wish to remove the buffer from its committing or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) * This may *change* the value of transaction_t->t_datalist, so anyone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) * who looks at t_datalist needs to lock against this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) * Even worse, someone may be doing a jbd2_journal_dirty_data on this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) * buffer. So we need to lock against that. jbd2_journal_dirty_data()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) * will come out of the lock with the buffer dirty, which makes it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) * ineligible for release here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) * Who else is affected by this? hmm... Really the only contender
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) * is do_get_write_access() - it could be looking at the buffer while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) * journal_try_to_free_buffer() is changing its state. But that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) * cannot happen because we never reallocate freed data as metadata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) * while the data is part of a transaction. Yes?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) * Return 0 on failure, 1 on success
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) struct buffer_head *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) struct buffer_head *bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) bool has_write_io_error = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) J_ASSERT(PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) head = page_buffers(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) bh = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) struct journal_head *jh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) * We take our own ref against the journal_head here to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) * having to add tons of locking around each instance of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) * jbd2_journal_put_journal_head().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) jh = jbd2_journal_grab_journal_head(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) if (!jh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) spin_lock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) __journal_try_to_free_buffer(journal, bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) jbd2_journal_put_journal_head(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) if (buffer_jbd(bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) goto busy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) * If we free a metadata buffer which has been failed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) * write out, the jbd2 checkpoint procedure will not detect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) * this failure and may lead to filesystem inconsistency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) * after cleanup journal tail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) if (buffer_write_io_error(bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) pr_err("JBD2: Error while async write back metadata bh %llu.",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) (unsigned long long)bh->b_blocknr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) has_write_io_error = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) } while ((bh = bh->b_this_page) != head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) ret = try_to_free_buffers(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) busy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) if (has_write_io_error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) jbd2_journal_abort(journal, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) * This buffer is no longer needed. If it is on an older transaction's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) * checkpoint list we need to record it on this transaction's forget list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) * to pin this buffer (and hence its checkpointing transaction) down until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) * this transaction commits. If the buffer isn't on a checkpoint list, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) * release it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) * Returns non-zero if JBD no longer has an interest in the buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) * Called under j_list_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) * Called under jh->b_state_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) int may_free = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) struct buffer_head *bh = jh2bh(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) if (jh->b_cp_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) JBUFFER_TRACE(jh, "on running+cp transaction");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) __jbd2_journal_temp_unlink_buffer(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) * We don't want to write the buffer anymore, clear the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) * bit so that we don't confuse checks in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) * __journal_file_buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) clear_buffer_dirty(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) may_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) JBUFFER_TRACE(jh, "on running transaction");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) __jbd2_journal_unfile_buffer(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) jbd2_journal_put_journal_head(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) return may_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) * jbd2_journal_invalidatepage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) * This code is tricky. It has a number of cases to deal with.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) * There are two invariants which this code relies on:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) * i_size must be updated on disk before we start calling invalidatepage on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) * data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) * This is done in ext3 by defining an ext3_setattr method which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) * updates i_size before truncate gets going. By maintaining this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) * invariant, we can be sure that it is safe to throw away any buffers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) * attached to the current transaction: once the transaction commits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) * we know that the data will not be needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) * Note however that we can *not* throw away data belonging to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) * previous, committing transaction!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) * Any disk blocks which *are* part of the previous, committing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) * transaction (and which therefore cannot be discarded immediately) are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) * not going to be reused in the new running transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) * The bitmap committed_data images guarantee this: any block which is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) * allocated in one transaction and removed in the next will be marked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) * as in-use in the committed_data bitmap, so cannot be reused until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) * the next transaction to delete the block commits. This means that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) * leaving committing buffers dirty is quite safe: the disk blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) * cannot be reallocated to a different file and so buffer aliasing is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) * not possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) * The above applies mainly to ordered data mode. In writeback mode we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) * don't make guarantees about the order in which data hits disk --- in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) * particular we don't guarantee that new dirty data is flushed before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) * transaction commit --- so it is always safe just to discard data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) * immediately in that mode. --sct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) * The journal_unmap_buffer helper function returns zero if the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) * concerned remains pinned as an anonymous buffer belonging to an older
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) * transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) * We're outside-transaction here. Either or both of j_running_transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) * and j_committing_transaction may be NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) int partial_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) transaction_t *transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) struct journal_head *jh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) int may_free = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) BUFFER_TRACE(bh, "entry");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) * It is safe to proceed here without the j_list_lock because the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) * buffers cannot be stolen by try_to_free_buffers as long as we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) * holding the page lock. --sct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) jh = jbd2_journal_grab_journal_head(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) if (!jh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) goto zap_buffer_unlocked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) /* OK, we have data buffer in journaled mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) write_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) spin_lock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) spin_lock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) * We cannot remove the buffer from checkpoint lists until the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) * transaction adding inode to orphan list (let's call it T)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) * is committed. Otherwise if the transaction changing the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) * buffer would be cleaned from the journal before T is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) * committed, a crash will cause that the correct contents of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) * the buffer will be lost. On the other hand we have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) * clear the buffer dirty bit at latest at the moment when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) * transaction marking the buffer as freed in the filesystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) * structures is committed because from that moment on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) * block can be reallocated and used by a different page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) * Since the block hasn't been freed yet but the inode has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) * already been added to orphan list, it is safe for us to add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) * the buffer to BJ_Forget list of the newest transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) * Also we have to clear buffer_mapped flag of a truncated buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) * because the buffer_head may be attached to the page straddling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) * i_size (can happen only when blocksize < pagesize) and thus the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) * buffer_head can be reused when the file is extended again. So we end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) * up keeping around invalidated buffers attached to transactions'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) * BJ_Forget list just to stop checkpointing code from cleaning up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) * the transaction this buffer was modified in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) transaction = jh->b_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) if (transaction == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) /* First case: not on any transaction. If it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) * has no checkpoint link, then we can zap it:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) * it's a writeback-mode buffer so we don't care
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) * if it hits disk safely. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) if (!jh->b_cp_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) JBUFFER_TRACE(jh, "not on any transaction: zap");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) goto zap_buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) if (!buffer_dirty(bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) /* bdflush has written it. We can drop it now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) __jbd2_journal_remove_checkpoint(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) goto zap_buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) /* OK, it must be in the journal but still not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) * written fully to disk: it's metadata or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) * journaled data... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) if (journal->j_running_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) /* ... and once the current transaction has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) * committed, the buffer won't be needed any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) * longer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) may_free = __dispose_buffer(jh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) journal->j_running_transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) goto zap_buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) /* There is no currently-running transaction. So the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) * orphan record which we wrote for this file must have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) * passed into commit. We must attach this buffer to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) * the committing transaction, if it exists. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) if (journal->j_committing_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) JBUFFER_TRACE(jh, "give to committing trans");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) may_free = __dispose_buffer(jh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) journal->j_committing_transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) goto zap_buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) /* The orphan record's transaction has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) * committed. We can cleanse this buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) clear_buffer_jbddirty(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) __jbd2_journal_remove_checkpoint(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) goto zap_buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) } else if (transaction == journal->j_committing_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) JBUFFER_TRACE(jh, "on committing transaction");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) * The buffer is committing, we simply cannot touch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) * it. If the page is straddling i_size we have to wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) * for commit and try again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) if (partial_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) write_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) jbd2_journal_put_journal_head(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) * OK, buffer won't be reachable after truncate. We just clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) * b_modified to not confuse transaction credit accounting, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) * set j_next_transaction to the running transaction (if there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) * is one) and mark buffer as freed so that commit code knows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) * it should clear dirty bits when it is done with the buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) set_buffer_freed(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) if (journal->j_running_transaction && buffer_jbddirty(bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) jh->b_next_transaction = journal->j_running_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) jh->b_modified = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) write_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) jbd2_journal_put_journal_head(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) /* Good, the buffer belongs to the running transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) * We are writing our own transaction's data, not any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) * previous one's, so it is safe to throw it away
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) * (remember that we expect the filesystem to have set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) * i_size already for this truncate so recovery will not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) * expose the disk blocks we are discarding here.) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) JBUFFER_TRACE(jh, "on running transaction");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) may_free = __dispose_buffer(jh, transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) zap_buffer:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) * This is tricky. Although the buffer is truncated, it may be reused
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) * if blocksize < pagesize and it is attached to the page straddling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) * EOF. Since the buffer might have been added to BJ_Forget list of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) * running transaction, journal_get_write_access() won't clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) * b_modified and credit accounting gets confused. So clear b_modified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) * here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) jh->b_modified = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) write_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) jbd2_journal_put_journal_head(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) zap_buffer_unlocked:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) clear_buffer_dirty(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) J_ASSERT_BH(bh, !buffer_jbddirty(bh));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) clear_buffer_mapped(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) clear_buffer_req(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) clear_buffer_new(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) clear_buffer_delay(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) clear_buffer_unwritten(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) bh->b_bdev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) return may_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) * jbd2_journal_invalidatepage()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) * @journal: journal to use for flush...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) * @page: page to flush
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) * @offset: start of the range to invalidate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) * @length: length of the range to invalidate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) * Reap page buffers containing data after in the specified range in page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) * Can return -EBUSY if buffers are part of the committing transaction and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) * the page is straddling i_size. Caller then has to wait for current commit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) * and try again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) int jbd2_journal_invalidatepage(journal_t *journal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) unsigned int offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) unsigned int length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) struct buffer_head *head, *bh, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) unsigned int stop = offset + length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) unsigned int curr_off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) int partial_page = (offset || length < PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) int may_free = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) if (!PageLocked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) if (!page_has_buffers(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) BUG_ON(stop > PAGE_SIZE || stop < length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) /* We will potentially be playing with lists other than just the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) * data lists (especially for journaled data mode), so be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) * cautious in our locking. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) head = bh = page_buffers(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) unsigned int next_off = curr_off + bh->b_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) next = bh->b_this_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) if (next_off > stop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) if (offset <= curr_off) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) /* This block is wholly outside the truncation point */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) lock_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) ret = journal_unmap_buffer(journal, bh, partial_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) unlock_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) may_free &= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) curr_off = next_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) bh = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) } while (bh != head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) if (!partial_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) if (may_free && try_to_free_buffers(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) J_ASSERT(!page_has_buffers(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) * File a buffer on the given transaction list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) void __jbd2_journal_file_buffer(struct journal_head *jh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) transaction_t *transaction, int jlist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) struct journal_head **list = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) int was_dirty = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) struct buffer_head *bh = jh2bh(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) lockdep_assert_held(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) assert_spin_locked(&transaction->t_journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) J_ASSERT_JH(jh, jh->b_transaction == transaction ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) jh->b_transaction == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) if (jh->b_transaction && jh->b_jlist == jlist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) jlist == BJ_Shadow || jlist == BJ_Forget) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) * For metadata buffers, we track dirty bit in buffer_jbddirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) * instead of buffer_dirty. We should not see a dirty bit set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) * here because we clear it in do_get_write_access but e.g.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) * tune2fs can modify the sb and set the dirty bit at any time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) * so we try to gracefully handle that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) if (buffer_dirty(bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) warn_dirty_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) if (test_clear_buffer_dirty(bh) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) test_clear_buffer_jbddirty(bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) was_dirty = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) if (jh->b_transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) __jbd2_journal_temp_unlink_buffer(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) jbd2_journal_grab_journal_head(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) jh->b_transaction = transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) switch (jlist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) case BJ_None:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) J_ASSERT_JH(jh, !jh->b_committed_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) J_ASSERT_JH(jh, !jh->b_frozen_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) case BJ_Metadata:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) transaction->t_nr_buffers++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) list = &transaction->t_buffers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) case BJ_Forget:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) list = &transaction->t_forget;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) case BJ_Shadow:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) list = &transaction->t_shadow_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) case BJ_Reserved:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) list = &transaction->t_reserved_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) __blist_add_buffer(list, jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) jh->b_jlist = jlist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) if (was_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) set_buffer_jbddirty(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) void jbd2_journal_file_buffer(struct journal_head *jh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) transaction_t *transaction, int jlist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) spin_lock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) spin_lock(&transaction->t_journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) __jbd2_journal_file_buffer(jh, transaction, jlist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) spin_unlock(&transaction->t_journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) * Remove a buffer from its current buffer list in preparation for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) * dropping it from its current transaction entirely. If the buffer has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) * already started to be used by a subsequent transaction, refile the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) * buffer on that transaction's metadata list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) * Called under j_list_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) * Called under jh->b_state_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) * When this function returns true, there's no next transaction to refile to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) * and the caller has to drop jh reference through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) * jbd2_journal_put_journal_head().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) bool __jbd2_journal_refile_buffer(struct journal_head *jh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) int was_dirty, jlist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) struct buffer_head *bh = jh2bh(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) lockdep_assert_held(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) if (jh->b_transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) /* If the buffer is now unused, just drop it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) if (jh->b_next_transaction == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) __jbd2_journal_unfile_buffer(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) * It has been modified by a later transaction: add it to the new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) * transaction's metadata list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) was_dirty = test_clear_buffer_jbddirty(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) __jbd2_journal_temp_unlink_buffer(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) * b_transaction must be set, otherwise the new b_transaction won't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) * be holding jh reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) J_ASSERT_JH(jh, jh->b_transaction != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) * We set b_transaction here because b_next_transaction will inherit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) * our jh reference and thus __jbd2_journal_file_buffer() must not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) * take a new one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) WRITE_ONCE(jh->b_transaction, jh->b_next_transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) WRITE_ONCE(jh->b_next_transaction, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) if (buffer_freed(bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) jlist = BJ_Forget;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) else if (jh->b_modified)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) jlist = BJ_Metadata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) jlist = BJ_Reserved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) __jbd2_journal_file_buffer(jh, jh->b_transaction, jlist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) if (was_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) set_buffer_jbddirty(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) * __jbd2_journal_refile_buffer() with necessary locking added. We take our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) * bh reference so that we can safely unlock bh.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) * The jh and bh may be freed by this call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) bool drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) spin_lock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) spin_lock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) drop = __jbd2_journal_refile_buffer(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) spin_unlock(&jh->b_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) if (drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) jbd2_journal_put_journal_head(jh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) * File inode in the inode list of the handle's transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) unsigned long flags, loff_t start_byte, loff_t end_byte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) transaction_t *transaction = handle->h_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) journal_t *journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) if (is_handle_aborted(handle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) journal = transaction->t_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) transaction->t_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) spin_lock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) jinode->i_flags |= flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) if (jinode->i_dirty_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) jinode->i_dirty_start = start_byte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) jinode->i_dirty_end = end_byte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) /* Is inode already attached where we need it? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) if (jinode->i_transaction == transaction ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) jinode->i_next_transaction == transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) * We only ever set this variable to 1 so the test is safe. Since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) * t_need_data_flush is likely to be set, we do the test to save some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) * cacheline bouncing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) if (!transaction->t_need_data_flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) transaction->t_need_data_flush = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) /* On some different transaction's list - should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) * the committing one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) if (jinode->i_transaction) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) J_ASSERT(jinode->i_next_transaction == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) J_ASSERT(jinode->i_transaction ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) journal->j_committing_transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) jinode->i_next_transaction = transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) /* Not on any transaction list... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) J_ASSERT(!jinode->i_next_transaction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) jinode->i_transaction = transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) list_add(&jinode->i_list, &transaction->t_inode_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) int jbd2_journal_inode_ranged_write(handle_t *handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) return jbd2_journal_file_inode(handle, jinode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) JI_WRITE_DATA | JI_WAIT_DATA, start_byte,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) start_byte + length - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) loff_t start_byte, loff_t length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) start_byte, start_byte + length - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) * File truncate and transaction commit interact with each other in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) * non-trivial way. If a transaction writing data block A is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) * committing, we cannot discard the data by truncate until we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) * written them. Otherwise if we crashed after the transaction with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) * write has committed but before the transaction with truncate has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) * committed, we could see stale data in block A. This function is a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) * helper to solve this problem. It starts writeout of the truncated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) * part in case it is in the committing transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) * Filesystem code must call this function when inode is journaled in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) * ordered mode before truncation happens and after the inode has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) * placed on orphan list with the new inode size. The second condition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) * avoids the race that someone writes new data and we start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) * committing the transaction after this function has been called but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) * before a transaction for truncate is started (and furthermore it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) * allows us to optimize the case where the addition to orphan list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) * happens in the same transaction as write --- we don't have to write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) * any data in such case).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) int jbd2_journal_begin_ordered_truncate(journal_t *journal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) struct jbd2_inode *jinode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) loff_t new_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) transaction_t *inode_trans, *commit_trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) /* This is a quick check to avoid locking if not necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) if (!jinode->i_transaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) /* Locks are here just to force reading of recent values, it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) * enough that the transaction was not committing before we started
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) * a transaction adding the inode to orphan list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) read_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) commit_trans = journal->j_committing_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) read_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) spin_lock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) inode_trans = jinode->i_transaction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) spin_unlock(&journal->j_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) if (inode_trans == commit_trans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) new_size, LLONG_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) jbd2_journal_abort(journal, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) }