^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* -*- mode: c; c-basic-offset: 8; -*-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * vim: noexpandtab sw=8 ts=8 sts=0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * journal.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Defines functions of journalling api
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Copyright (C) 2003, 2004 Oracle. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/time.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <cluster/masklog.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include "ocfs2.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include "alloc.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include "blockcheck.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include "dir.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include "dlmglue.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include "extent_map.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include "heartbeat.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include "inode.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include "journal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include "localalloc.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include "slot_map.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include "super.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include "sysfile.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include "uptodate.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include "quota.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include "file.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include "namei.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include "buffer_head_io.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include "ocfs2_trace.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) DEFINE_SPINLOCK(trans_inc_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) static int ocfs2_force_read_journal(struct inode *inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) static int ocfs2_recover_node(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) int node_num, int slot_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) static int __ocfs2_recovery_thread(void *arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) static int ocfs2_commit_cache(struct ocfs2_super *osb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) int dirty, int replayed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) static int ocfs2_trylock_journal(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) int slot_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) static int ocfs2_recover_orphans(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) int slot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) enum ocfs2_orphan_reco_type orphan_reco_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) static int ocfs2_commit_thread(void *arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) int slot_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) struct ocfs2_dinode *la_dinode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) struct ocfs2_dinode *tl_dinode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) struct ocfs2_quota_recovery *qrec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) enum ocfs2_orphan_reco_type orphan_reco_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) return __ocfs2_wait_on_mount(osb, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) return __ocfs2_wait_on_mount(osb, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * This replay_map is to track online/offline slots, so we could recover
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * offline slots during recovery and mount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) enum ocfs2_replay_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) REPLAY_UNNEEDED = 0, /* Replay is not needed, so ignore this map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) REPLAY_NEEDED, /* Replay slots marked in rm_replay_slots */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) REPLAY_DONE /* Replay was already queued */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) struct ocfs2_replay_map {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) unsigned int rm_slots;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) enum ocfs2_replay_state rm_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) unsigned char rm_replay_slots[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) static void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) if (!osb->replay_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) /* If we've already queued the replay, we don't have any more to do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) if (osb->replay_map->rm_state == REPLAY_DONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) osb->replay_map->rm_state = state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) struct ocfs2_replay_map *replay_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) int i, node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) /* If replay map is already set, we don't do it again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) if (osb->replay_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) (osb->max_slots * sizeof(char)), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) if (!replay_map) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) mlog_errno(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) spin_lock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) replay_map->rm_slots = osb->max_slots;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) replay_map->rm_state = REPLAY_UNNEEDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) /* set rm_replay_slots for offline slot(s) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) for (i = 0; i < replay_map->rm_slots; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) replay_map->rm_replay_slots[i] = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) osb->replay_map = replay_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) static void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) enum ocfs2_orphan_reco_type orphan_reco_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) struct ocfs2_replay_map *replay_map = osb->replay_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) if (!replay_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (replay_map->rm_state != REPLAY_NEEDED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) for (i = 0; i < replay_map->rm_slots; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) if (replay_map->rm_replay_slots[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) ocfs2_queue_recovery_completion(osb->journal, i, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) NULL, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) orphan_reco_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) replay_map->rm_state = REPLAY_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) static void ocfs2_free_replay_slots(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) struct ocfs2_replay_map *replay_map = osb->replay_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) if (!osb->replay_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) kfree(replay_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) osb->replay_map = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) int ocfs2_recovery_init(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) struct ocfs2_recovery_map *rm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) mutex_init(&osb->recovery_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) osb->disable_recovery = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) osb->recovery_thread_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) init_waitqueue_head(&osb->recovery_event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) osb->max_slots * sizeof(unsigned int),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) if (!rm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) mlog_errno(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) rm->rm_entries = (unsigned int *)((char *)rm +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) sizeof(struct ocfs2_recovery_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) osb->recovery_map = rm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) /* we can't grab the goofy sem lock from inside wait_event, so we use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * memory barriers to make sure that we'll see the null task before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * being woken up */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) return osb->recovery_thread_task != NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) void ocfs2_recovery_exit(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) struct ocfs2_recovery_map *rm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) /* disable any new recovery threads and wait for any currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) * running ones to exit. Do this before setting the vol_state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) mutex_lock(&osb->recovery_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) osb->disable_recovery = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) mutex_unlock(&osb->recovery_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) /* At this point, we know that no more recovery threads can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * launched, so wait for any recovery completion work to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * complete. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) if (osb->ocfs2_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) flush_workqueue(osb->ocfs2_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * Now that recovery is shut down, and the osb is about to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * freed, the osb_lock is not taken here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) rm = osb->recovery_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) /* XXX: Should we bug if there are dirty entries? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) kfree(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) unsigned int node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) struct ocfs2_recovery_map *rm = osb->recovery_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) assert_spin_locked(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) for (i = 0; i < rm->rm_used; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) if (rm->rm_entries[i] == node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) /* Behaves like test-and-set. Returns the previous value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) unsigned int node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) struct ocfs2_recovery_map *rm = osb->recovery_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) spin_lock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) if (__ocfs2_recovery_map_test(osb, node_num)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) /* XXX: Can this be exploited? Not from o2dlm... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) BUG_ON(rm->rm_used >= osb->max_slots);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) rm->rm_entries[rm->rm_used] = node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) rm->rm_used++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) unsigned int node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) struct ocfs2_recovery_map *rm = osb->recovery_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) spin_lock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) for (i = 0; i < rm->rm_used; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) if (rm->rm_entries[i] == node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) if (i < rm->rm_used) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) /* XXX: be careful with the pointer math */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) (rm->rm_used - i - 1) * sizeof(unsigned int));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) rm->rm_used--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) static int ocfs2_commit_cache(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) int status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) unsigned int flushed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) struct ocfs2_journal *journal = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) journal = osb->journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) /* Flush all pending commits and checkpoint the journal. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) down_write(&journal->j_trans_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) flushed = atomic_read(&journal->j_num_trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) trace_ocfs2_commit_cache_begin(flushed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) if (flushed == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) up_write(&journal->j_trans_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) goto finally;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) jbd2_journal_lock_updates(journal->j_journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) status = jbd2_journal_flush(journal->j_journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) jbd2_journal_unlock_updates(journal->j_journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) up_write(&journal->j_trans_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) goto finally;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) ocfs2_inc_trans_id(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) flushed = atomic_read(&journal->j_num_trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) atomic_set(&journal->j_num_trans, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) up_write(&journal->j_trans_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) trace_ocfs2_commit_cache_end(journal->j_trans_id, flushed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) ocfs2_wake_downconvert_thread(osb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) wake_up(&journal->j_checkpointed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) finally:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) journal_t *journal = osb->journal->j_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) handle_t *handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) BUG_ON(!osb || !osb->journal->j_journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) if (ocfs2_is_hard_readonly(osb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) return ERR_PTR(-EROFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) BUG_ON(max_buffs <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) /* Nested transaction? Just return the handle... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) if (journal_current_handle())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) return jbd2_journal_start(journal, max_buffs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) sb_start_intwrite(osb->sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) down_read(&osb->journal->j_trans_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) handle = jbd2_journal_start(journal, max_buffs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) if (IS_ERR(handle)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) up_read(&osb->journal->j_trans_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) sb_end_intwrite(osb->sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) mlog_errno(PTR_ERR(handle));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) if (is_journal_aborted(journal)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) ocfs2_abort(osb->sb, "Detected aborted journal\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) handle = ERR_PTR(-EROFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) if (!ocfs2_mount_local(osb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) atomic_inc(&(osb->journal->j_num_trans));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) return handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) int ocfs2_commit_trans(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) handle_t *handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) int ret, nested;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) struct ocfs2_journal *journal = osb->journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) BUG_ON(!handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) nested = handle->h_ref > 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) ret = jbd2_journal_stop(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) if (!nested) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) up_read(&journal->j_trans_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) sb_end_intwrite(osb->sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) * 'nblocks' is what you want to add to the current transaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) * This might call jbd2_journal_restart() which will commit dirty buffers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) * and then restart the transaction. Before calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * ocfs2_extend_trans(), any changed blocks should have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) * dirtied. After calling it, all blocks which need to be changed must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) * go through another set of journal_access/journal_dirty calls.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) * WARNING: This will not release any semaphores or disk locks taken
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) * during the transaction, so make sure they were taken *before*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) * start_trans or we'll have ordering deadlocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * WARNING2: Note that we do *not* drop j_trans_barrier here. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) * good because transaction ids haven't yet been recorded on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) * cluster locks associated with this handle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) int ocfs2_extend_trans(handle_t *handle, int nblocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) int status, old_nblocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) BUG_ON(!handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) BUG_ON(nblocks < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) if (!nblocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) old_nblocks = jbd2_handle_buffer_credits(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) trace_ocfs2_extend_trans(old_nblocks, nblocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) #ifdef CONFIG_OCFS2_DEBUG_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) status = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) status = jbd2_journal_extend(handle, nblocks, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) if (status > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) trace_ocfs2_extend_trans_restart(old_nblocks + nblocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) status = jbd2_journal_restart(handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) old_nblocks + nblocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * If that fails, restart the transaction & regain write access for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * buffer head which is used for metadata modifications.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * Taken from Ext4: extend_or_restart_transaction()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) int ocfs2_allocate_extend_trans(handle_t *handle, int thresh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) int status, old_nblks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) BUG_ON(!handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) old_nblks = jbd2_handle_buffer_credits(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) trace_ocfs2_allocate_extend_trans(old_nblks, thresh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) if (old_nblks < thresh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) if (status > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) struct ocfs2_triggers {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) struct jbd2_buffer_trigger_type ot_triggers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) int ot_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) return container_of(triggers, struct ocfs2_triggers, ot_triggers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) struct buffer_head *bh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) void *data, size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) * We aren't guaranteed to have the superblock here, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) * must unconditionally compute the ecc data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) * __ocfs2_journal_access() will only set the triggers if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) * metaecc is enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) ocfs2_block_check_compute(data, size, data + ot->ot_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * Quota blocks have their own trigger because the struct ocfs2_block_check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) * offset depends on the blocksize.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) struct buffer_head *bh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) void *data, size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) struct ocfs2_disk_dqtrailer *dqt =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) ocfs2_block_dqtrailer(size, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) * We aren't guaranteed to have the superblock here, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) * must unconditionally compute the ecc data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) * __ocfs2_journal_access() will only set the triggers if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * metaecc is enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) ocfs2_block_check_compute(data, size, &dqt->dq_check);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * Directory blocks also have their own trigger because the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) * struct ocfs2_block_check offset depends on the blocksize.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) struct buffer_head *bh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) void *data, size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) struct ocfs2_dir_block_trailer *trailer =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) ocfs2_dir_trailer_from_size(size, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) * We aren't guaranteed to have the superblock here, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) * must unconditionally compute the ecc data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) * __ocfs2_journal_access() will only set the triggers if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * metaecc is enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) ocfs2_block_check_compute(data, size, &trailer->db_check);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) struct buffer_head *bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) mlog(ML_ERROR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) "bh->b_blocknr = %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) (unsigned long)bh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) (unsigned long long)bh->b_blocknr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) ocfs2_error(bh->b_bdev->bd_super,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) "JBD2 has aborted our journal, ocfs2 cannot continue\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) static struct ocfs2_triggers di_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) .ot_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) .t_frozen = ocfs2_frozen_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) .t_abort = ocfs2_abort_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) .ot_offset = offsetof(struct ocfs2_dinode, i_check),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) static struct ocfs2_triggers eb_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) .ot_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) .t_frozen = ocfs2_frozen_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) .t_abort = ocfs2_abort_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) static struct ocfs2_triggers rb_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) .ot_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) .t_frozen = ocfs2_frozen_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) .t_abort = ocfs2_abort_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) static struct ocfs2_triggers gd_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) .ot_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) .t_frozen = ocfs2_frozen_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) .t_abort = ocfs2_abort_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) static struct ocfs2_triggers db_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) .ot_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) .t_frozen = ocfs2_db_frozen_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) .t_abort = ocfs2_abort_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) static struct ocfs2_triggers xb_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) .ot_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) .t_frozen = ocfs2_frozen_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) .t_abort = ocfs2_abort_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) static struct ocfs2_triggers dq_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) .ot_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) .t_frozen = ocfs2_dq_frozen_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) .t_abort = ocfs2_abort_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) static struct ocfs2_triggers dr_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) .ot_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) .t_frozen = ocfs2_frozen_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) .t_abort = ocfs2_abort_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) static struct ocfs2_triggers dl_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) .ot_triggers = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) .t_frozen = ocfs2_frozen_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) .t_abort = ocfs2_abort_trigger,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) static int __ocfs2_journal_access(handle_t *handle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) struct ocfs2_caching_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) struct buffer_head *bh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) struct ocfs2_triggers *triggers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) struct ocfs2_super *osb =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) BUG_ON(!ci || !ci->ci_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) BUG_ON(!handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) BUG_ON(!bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) trace_ocfs2_journal_access(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) (unsigned long long)ocfs2_metadata_cache_owner(ci),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) (unsigned long long)bh->b_blocknr, type, bh->b_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) /* we can safely remove this assertion after testing. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) if (!buffer_uptodate(bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) mlog(ML_ERROR, "b_blocknr=%llu, b_state=0x%lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) (unsigned long long)bh->b_blocknr, bh->b_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) lock_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) * A previous transaction with a couple of buffer heads fail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) * to checkpoint, so all the bhs are marked as BH_Write_EIO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) * For current transaction, the bh is just among those error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) * bhs which previous transaction handle. We can't just clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) * its BH_Write_EIO and reuse directly, since other bhs are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) * not written to disk yet and that will cause metadata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) * inconsistency. So we should set fs read-only to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) * further damage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) unlock_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) return ocfs2_error(osb->sb, "A previous attempt to "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) "write this buffer head failed\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) unlock_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) /* Set the current transaction information on the ci so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) * that the locking code knows whether it can drop it's locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) * on this ci or not. We're protected from the commit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) * thread updating the current transaction id until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) * ocfs2_commit_trans() because ocfs2_start_trans() took
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) * j_trans_barrier for us. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) ocfs2_set_ci_lock_trans(osb->journal, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) ocfs2_metadata_cache_io_lock(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) case OCFS2_JOURNAL_ACCESS_CREATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) case OCFS2_JOURNAL_ACCESS_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) status = jbd2_journal_get_write_access(handle, bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) case OCFS2_JOURNAL_ACCESS_UNDO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) status = jbd2_journal_get_undo_access(handle, bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) status = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) mlog(ML_ERROR, "Unknown access type!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) if (!status && ocfs2_meta_ecc(osb) && triggers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) ocfs2_metadata_cache_io_unlock(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) status, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) struct buffer_head *bh, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) struct buffer_head *bh, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) struct buffer_head *bh, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) struct buffer_head *bh, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) struct buffer_head *bh, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) struct buffer_head *bh, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) struct buffer_head *bh, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) struct buffer_head *bh, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) struct buffer_head *bh, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) struct buffer_head *bh, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) return __ocfs2_journal_access(handle, ci, bh, NULL, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) trace_ocfs2_journal_dirty((unsigned long long)bh->b_blocknr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) status = jbd2_journal_dirty_metadata(handle, bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) if (status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) if (!is_handle_aborted(handle)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) journal_t *journal = handle->h_transaction->t_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) struct super_block *sb = bh->b_bdev->bd_super;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) "Aborting transaction and journal.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) handle->h_err = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) jbd2_journal_abort_handle(handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) jbd2_journal_abort(journal, status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) ocfs2_abort(sb, "Journal already aborted.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) void ocfs2_set_journal_params(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) journal_t *journal = osb->journal->j_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) if (osb->osb_commit_interval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) commit_interval = osb->osb_commit_interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) write_lock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) journal->j_commit_interval = commit_interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) journal->j_flags |= JBD2_BARRIER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) journal->j_flags &= ~JBD2_BARRIER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) write_unlock(&journal->j_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) int status = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) struct inode *inode = NULL; /* the journal inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) journal_t *j_journal = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) struct ocfs2_dinode *di = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) struct buffer_head *bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) struct ocfs2_super *osb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) int inode_lock = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) BUG_ON(!journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) osb = journal->j_osb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) /* already have the inode for our journal */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) osb->slot_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) if (inode == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) status = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) if (is_bad_inode(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) mlog(ML_ERROR, "access error (bad inode)\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) status = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) SET_INODE_JOURNAL(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) OCFS2_I(inode)->ip_open_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) /* Skip recovery waits here - journal inode metadata never
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) * changes in a live cluster so it can be considered an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) * exception to the rule. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) if (status != -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) mlog(ML_ERROR, "Could not get lock on journal!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) inode_lock = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) di = (struct ocfs2_dinode *)bh->b_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) if (i_size_read(inode) < OCFS2_MIN_JOURNAL_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) mlog(ML_ERROR, "Journal file size (%lld) is too small!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) i_size_read(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) status = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) trace_ocfs2_journal_init(i_size_read(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) (unsigned long long)inode->i_blocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) OCFS2_I(inode)->ip_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) /* call the kernels journal init function now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) j_journal = jbd2_journal_init_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) if (j_journal == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) mlog(ML_ERROR, "Linux journal layer error\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) status = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) trace_ocfs2_journal_init_maxlen(j_journal->j_total_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) OCFS2_JOURNAL_DIRTY_FL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) journal->j_journal = j_journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) journal->j_journal->j_submit_inode_data_buffers =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) jbd2_journal_submit_inode_data_buffers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) journal->j_journal->j_finish_inode_data_buffers =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) jbd2_journal_finish_inode_data_buffers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) journal->j_inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) journal->j_bh = bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) ocfs2_set_journal_params(osb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) journal->j_state = OCFS2_JOURNAL_LOADED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) if (inode_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) ocfs2_inode_unlock(inode, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) brelse(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) if (inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) OCFS2_I(inode)->ip_open_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) int dirty, int replayed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) unsigned int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) struct ocfs2_journal *journal = osb->journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) struct buffer_head *bh = journal->j_bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) struct ocfs2_dinode *fe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) fe = (struct ocfs2_dinode *)bh->b_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) /* The journal bh on the osb always comes from ocfs2_journal_init()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) * and was validated there inside ocfs2_inode_lock_full(). It's a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) * code bug if we mess it up. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) flags = le32_to_cpu(fe->id1.journal1.ij_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) if (dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) flags |= OCFS2_JOURNAL_DIRTY_FL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) flags &= ~OCFS2_JOURNAL_DIRTY_FL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) fe->id1.journal1.ij_flags = cpu_to_le32(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) if (replayed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) ocfs2_bump_recovery_generation(fe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) * If the journal has been kmalloc'd it needs to be freed after this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) * call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) void ocfs2_journal_shutdown(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) struct ocfs2_journal *journal = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) int status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) struct inode *inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) int num_running_trans = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) BUG_ON(!osb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) journal = osb->journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) if (!journal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) inode = journal->j_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) if (journal->j_state != OCFS2_JOURNAL_LOADED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) /* need to inc inode use count - jbd2_journal_destroy will iput. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) if (!igrab(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) num_running_trans = atomic_read(&(osb->journal->j_num_trans));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) trace_ocfs2_journal_shutdown(num_running_trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) /* Do a commit_cache here. It will flush our journal, *and*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) * release any locks that are still held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) * set the SHUTDOWN flag and release the trans lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) * the commit thread will take the trans lock for us below. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) /* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) * drop the trans_lock (which we want to hold until we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) * completely destroy the journal. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) if (osb->commit_task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) /* Wait for the commit thread */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) trace_ocfs2_journal_shutdown_wait(osb->commit_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) kthread_stop(osb->commit_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) osb->commit_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) if (ocfs2_mount_local(osb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) jbd2_journal_lock_updates(journal->j_journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) status = jbd2_journal_flush(journal->j_journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) jbd2_journal_unlock_updates(journal->j_journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) /* Shutdown the kernel journal system */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) if (!jbd2_journal_destroy(journal->j_journal) && !status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) * Do not toggle if flush was unsuccessful otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) * will leave dirty metadata in a "clean" journal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) status = ocfs2_journal_toggle_dirty(osb, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) journal->j_journal = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) OCFS2_I(inode)->ip_open_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) /* unlock our journal */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) ocfs2_inode_unlock(inode, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) brelse(journal->j_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) journal->j_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) journal->j_state = OCFS2_JOURNAL_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) // up_write(&journal->j_trans_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) static void ocfs2_clear_journal_error(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) journal_t *journal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) int slot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) int olderr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) olderr = jbd2_journal_errno(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) if (olderr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) mlog(ML_ERROR, "File system error %d recorded in "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) "journal %u.\n", olderr, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) mlog(ML_ERROR, "File system on device %s needs checking.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) sb->s_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) jbd2_journal_ack_err(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) jbd2_journal_clear_err(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) int status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) struct ocfs2_super *osb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) BUG_ON(!journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) osb = journal->j_osb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) status = jbd2_journal_load(journal->j_journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) mlog(ML_ERROR, "Failed to load journal!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) if (replayed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) jbd2_journal_lock_updates(journal->j_journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) status = jbd2_journal_flush(journal->j_journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) jbd2_journal_unlock_updates(journal->j_journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) /* Launch the commit thread */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) if (!local) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) osb->commit_task = kthread_run(ocfs2_commit_thread, osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) "ocfs2cmt-%s", osb->uuid_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) if (IS_ERR(osb->commit_task)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) status = PTR_ERR(osb->commit_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) osb->commit_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) mlog(ML_ERROR, "unable to launch ocfs2commit thread, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) "error=%d", status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) osb->commit_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) /* 'full' flag tells us whether we clear out all blocks or if we just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) * mark the journal clean */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) BUG_ON(!journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) status = jbd2_journal_wipe(journal->j_journal, full);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) static int ocfs2_recovery_completed(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) int empty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) struct ocfs2_recovery_map *rm = osb->recovery_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) spin_lock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) empty = (rm->rm_used == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) return empty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) * JBD Might read a cached version of another nodes journal file. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) * don't want this as this file changes often and we get no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) * notification on those changes. The only way to be sure that we've
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) * got the most up to date version of those blocks then is to force
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) * read them off disk. Just searching through the buffer cache won't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) * work as there may be pages backing this file which are still marked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) * up to date. We know things can't change on this file underneath us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) * as we have the lock by now :)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) static int ocfs2_force_read_journal(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) int status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) u64 v_blkno, p_blkno, p_blocks, num_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) struct buffer_head *bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) v_blkno = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) while (v_blkno < num_blocks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) status = ocfs2_extent_map_get_blocks(inode, v_blkno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) &p_blkno, &p_blocks, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) for (i = 0; i < p_blocks; i++, p_blkno++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) bh = __find_get_block(osb->sb->s_bdev, p_blkno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) osb->sb->s_blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) /* block not cached. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) if (!bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) brelse(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) /* We are reading journal data which should not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) * be put in the uptodate cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) brelse(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) v_blkno += p_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) struct ocfs2_la_recovery_item {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) struct list_head lri_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) int lri_slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) struct ocfs2_dinode *lri_la_dinode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) struct ocfs2_dinode *lri_tl_dinode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) struct ocfs2_quota_recovery *lri_qrec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) enum ocfs2_orphan_reco_type lri_orphan_reco_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) /* Does the second half of the recovery process. By this point, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) * node is marked clean and can actually be considered recovered,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) * hence it's no longer in the recovery map, but there's still some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) * cleanup we can do which shouldn't happen within the recovery thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) * as locking in that context becomes very difficult if we are to take
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) * recovering nodes into account.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) * NOTE: This function can and will sleep on recovery of other nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) * during cluster locking, just like any other ocfs2 process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) void ocfs2_complete_recovery(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) struct ocfs2_journal *journal =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) container_of(work, struct ocfs2_journal, j_recovery_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) struct ocfs2_super *osb = journal->j_osb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) struct ocfs2_dinode *la_dinode, *tl_dinode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) struct ocfs2_la_recovery_item *item, *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) struct ocfs2_quota_recovery *qrec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) enum ocfs2_orphan_reco_type orphan_reco_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) LIST_HEAD(tmp_la_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) trace_ocfs2_complete_recovery(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) (unsigned long long)OCFS2_I(journal->j_inode)->ip_blkno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) spin_lock(&journal->j_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) spin_unlock(&journal->j_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) list_del_init(&item->lri_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) ocfs2_wait_on_quotas(osb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) la_dinode = item->lri_la_dinode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) tl_dinode = item->lri_tl_dinode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) qrec = item->lri_qrec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) orphan_reco_type = item->lri_orphan_reco_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) trace_ocfs2_complete_recovery_slot(item->lri_slot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) tl_dinode ? le64_to_cpu(tl_dinode->i_blkno) : 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) qrec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) if (la_dinode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) ret = ocfs2_complete_local_alloc_recovery(osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) la_dinode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) kfree(la_dinode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) if (tl_dinode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) ret = ocfs2_complete_truncate_log_recovery(osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) tl_dinode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) kfree(tl_dinode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) ret = ocfs2_recover_orphans(osb, item->lri_slot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) orphan_reco_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) if (qrec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) ret = ocfs2_finish_quota_recovery(osb, qrec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) item->lri_slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) /* Recovery info is already freed now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) kfree(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) trace_ocfs2_complete_recovery_end(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) /* NOTE: This function always eats your references to la_dinode and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) * tl_dinode, either manually on error, or by passing them to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) * ocfs2_complete_recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) int slot_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) struct ocfs2_dinode *la_dinode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) struct ocfs2_dinode *tl_dinode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) struct ocfs2_quota_recovery *qrec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) enum ocfs2_orphan_reco_type orphan_reco_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) struct ocfs2_la_recovery_item *item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) if (!item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) /* Though we wish to avoid it, we are in fact safe in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) * skipping local alloc cleanup as fsck.ocfs2 is more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) * than capable of reclaiming unused space. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) kfree(la_dinode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) kfree(tl_dinode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) if (qrec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) ocfs2_free_quota_recovery(qrec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) mlog_errno(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) INIT_LIST_HEAD(&item->lri_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) item->lri_la_dinode = la_dinode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) item->lri_slot = slot_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) item->lri_tl_dinode = tl_dinode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) item->lri_qrec = qrec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) item->lri_orphan_reco_type = orphan_reco_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) spin_lock(&journal->j_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) list_add_tail(&item->lri_list, &journal->j_la_cleanups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) queue_work(journal->j_osb->ocfs2_wq, &journal->j_recovery_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) spin_unlock(&journal->j_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) /* Called by the mount code to queue recovery the last part of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) * recovery for it's own and offline slot(s). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) struct ocfs2_journal *journal = osb->journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) if (ocfs2_is_hard_readonly(osb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) /* No need to queue up our truncate_log as regular cleanup will catch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) * that */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) ocfs2_queue_recovery_completion(journal, osb->slot_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) osb->local_alloc_copy, NULL, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) ORPHAN_NEED_TRUNCATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) ocfs2_schedule_truncate_log_flush(osb, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) osb->local_alloc_copy = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) /* queue to recover orphan slots for all offline slots */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) ocfs2_free_replay_slots(osb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) if (osb->quota_rec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) ocfs2_queue_recovery_completion(osb->journal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) osb->slot_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) osb->quota_rec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) ORPHAN_NEED_TRUNCATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) osb->quota_rec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) static int __ocfs2_recovery_thread(void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) int status, node_num, slot_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) struct ocfs2_super *osb = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) struct ocfs2_recovery_map *rm = osb->recovery_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) int *rm_quota = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) int rm_quota_used = 0, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) struct ocfs2_quota_recovery *qrec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) /* Whether the quota supported. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) int quota_enabled = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) || OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) OCFS2_FEATURE_RO_COMPAT_GRPQUOTA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) status = ocfs2_wait_on_mount(osb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) if (quota_enabled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) if (!rm_quota) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) status = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) status = ocfs2_super_lock(osb, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) status = ocfs2_compute_replay_slots(osb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) /* queue recovery for our own slot */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) NULL, NULL, ORPHAN_NO_NEED_TRUNCATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) spin_lock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) while (rm->rm_used) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) /* It's always safe to remove entry zero, as we won't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) * clear it until ocfs2_recover_node() has succeeded. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) node_num = rm->rm_entries[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) slot_num = ocfs2_node_num_to_slot(osb, node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) trace_ocfs2_recovery_thread_node(node_num, slot_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) if (slot_num == -ENOENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) goto skip_recovery;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) /* It is a bit subtle with quota recovery. We cannot do it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) * immediately because we have to obtain cluster locks from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) * quota files and we also don't want to just skip it because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) * then quota usage would be out of sync until some node takes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) * the slot. So we remember which nodes need quota recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) * and when everything else is done, we recover quotas. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) if (quota_enabled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) for (i = 0; i < rm_quota_used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) && rm_quota[i] != slot_num; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) if (i == rm_quota_used)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) rm_quota[rm_quota_used++] = slot_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) status = ocfs2_recover_node(osb, node_num, slot_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) skip_recovery:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) if (!status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) ocfs2_recovery_map_clear(osb, node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) mlog(ML_ERROR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) "Error %d recovering node %d on device (%u,%u)!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) status, node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) mlog(ML_ERROR, "Volume requires unmount.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) spin_lock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) trace_ocfs2_recovery_thread_end(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) /* Refresh all journal recovery generations from disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) status = ocfs2_check_journals_nolocks(osb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) status = (status == -EROFS) ? 0 : status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) /* Now it is right time to recover quotas... We have to do this under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) * superblock lock so that no one can start using the slot (and crash)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) * before we recover it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) if (quota_enabled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) for (i = 0; i < rm_quota_used; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) if (IS_ERR(qrec)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) status = PTR_ERR(qrec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) ocfs2_queue_recovery_completion(osb->journal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) rm_quota[i],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) NULL, NULL, qrec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) ORPHAN_NEED_TRUNCATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) ocfs2_super_unlock(osb, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) /* queue recovery for offline slots */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) mutex_lock(&osb->recovery_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) if (!status && !ocfs2_recovery_completed(osb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) mutex_unlock(&osb->recovery_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) ocfs2_free_replay_slots(osb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) osb->recovery_thread_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) mb(); /* sync with ocfs2_recovery_thread_running */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) wake_up(&osb->recovery_event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) mutex_unlock(&osb->recovery_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) if (quota_enabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) kfree(rm_quota);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) /* no one is callint kthread_stop() for us so the kthread() api
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) * requires that we call do_exit(). And it isn't exported, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) * complete_and_exit() seems to be a minimal wrapper around it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) complete_and_exit(NULL, status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) mutex_lock(&osb->recovery_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) trace_ocfs2_recovery_thread(node_num, osb->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) osb->disable_recovery, osb->recovery_thread_task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) osb->disable_recovery ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) -1 : ocfs2_recovery_map_set(osb, node_num));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) if (osb->disable_recovery)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) if (osb->recovery_thread_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) "ocfs2rec-%s", osb->uuid_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) if (IS_ERR(osb->recovery_thread_task)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) mlog_errno((int)PTR_ERR(osb->recovery_thread_task));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) osb->recovery_thread_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) mutex_unlock(&osb->recovery_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) wake_up(&osb->recovery_event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) int slot_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) struct buffer_head **bh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) struct inode **ret_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) int status = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) struct inode *inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) BUG_ON(slot_num >= osb->max_slots);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) slot_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) if (!inode || is_bad_inode(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) SET_INODE_JOURNAL(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) if (inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) if (status || !ret_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) *ret_inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) /* Does the actual journal replay and marks the journal inode as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) * clean. Will only replay if the journal inode is marked dirty. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) static int ocfs2_replay_journal(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) int node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) int slot_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) int got_lock = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) unsigned int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) struct inode *inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) struct ocfs2_dinode *fe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) journal_t *journal = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) struct buffer_head *bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) u32 slot_reco_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) if (status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) fe = (struct ocfs2_dinode *)bh->b_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) slot_reco_gen = ocfs2_get_recovery_generation(fe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) brelse(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) * As the fs recovery is asynchronous, there is a small chance that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) * another node mounted (and recovered) the slot before the recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) * thread could get the lock. To handle that, we dirty read the journal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) * inode for that slot to get the recovery generation. If it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) * different than what we expected, the slot has been recovered.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) * If not, it needs recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) trace_ocfs2_replay_journal_recovered(slot_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) osb->slot_recovery_generations[slot_num], slot_reco_gen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) osb->slot_recovery_generations[slot_num] = slot_reco_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) status = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) /* Continue with recovery as the journal has not yet been recovered */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) trace_ocfs2_replay_journal_lock_err(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) if (status != -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) mlog(ML_ERROR, "Could not lock journal!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) got_lock = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) fe = (struct ocfs2_dinode *) bh->b_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) flags = le32_to_cpu(fe->id1.journal1.ij_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) slot_reco_gen = ocfs2_get_recovery_generation(fe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) trace_ocfs2_replay_journal_skip(node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) /* Refresh recovery generation for the slot */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) osb->slot_recovery_generations[slot_num] = slot_reco_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) /* we need to run complete recovery for offline orphan slots */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) printk(KERN_NOTICE "ocfs2: Begin replay journal (node %d, slot %d) on "\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) MINOR(osb->sb->s_dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) status = ocfs2_force_read_journal(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) journal = jbd2_journal_init_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) if (journal == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) mlog(ML_ERROR, "Linux journal layer error\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) status = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) status = jbd2_journal_load(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) if (!igrab(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) jbd2_journal_destroy(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) ocfs2_clear_journal_error(osb->sb, journal, slot_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) /* wipe the journal */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) jbd2_journal_lock_updates(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) status = jbd2_journal_flush(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) jbd2_journal_unlock_updates(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) /* This will mark the node clean */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) flags = le32_to_cpu(fe->id1.journal1.ij_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) flags &= ~OCFS2_JOURNAL_DIRTY_FL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) fe->id1.journal1.ij_flags = cpu_to_le32(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) /* Increment recovery generation to indicate successful recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) ocfs2_bump_recovery_generation(fe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) osb->slot_recovery_generations[slot_num] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) ocfs2_get_recovery_generation(fe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) if (!igrab(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) jbd2_journal_destroy(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) printk(KERN_NOTICE "ocfs2: End replay journal (node %d, slot %d) on "\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) MINOR(osb->sb->s_dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) /* drop the lock on this nodes journal */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) if (got_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) ocfs2_inode_unlock(inode, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) brelse(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) * Do the most important parts of node recovery:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) * - Replay it's journal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) * - Stamp a clean local allocator file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) * - Stamp a clean truncate log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) * - Mark the node clean
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) * If this function completes without error, a node in OCFS2 can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) * said to have been safely recovered. As a result, failure during the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) * second part of a nodes recovery process (local alloc recovery) is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) * far less concerning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) static int ocfs2_recover_node(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) int node_num, int slot_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) int status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) struct ocfs2_dinode *la_copy = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) struct ocfs2_dinode *tl_copy = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) trace_ocfs2_recover_node(node_num, slot_num, osb->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) /* Should not ever be called to recover ourselves -- in that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) * case we should've called ocfs2_journal_load instead. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) BUG_ON(osb->node_num == node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) status = ocfs2_replay_journal(osb, node_num, slot_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) if (status == -EBUSY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) trace_ocfs2_recover_node_skip(slot_num, node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) /* Stamp a clean local alloc file AFTER recovering the journal... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) /* An error from begin_truncate_log_recovery is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) * serious enough to warrant halting the rest of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) * recovery. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) /* Likewise, this would be a strange but ultimately not so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) * harmful place to get an error... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) status = ocfs2_clear_slot(osb, slot_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) /* This will kfree the memory pointed to by la_copy and tl_copy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) tl_copy, NULL, ORPHAN_NEED_TRUNCATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) /* Test node liveness by trylocking his journal. If we get the lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) * we drop it here. Return 0 if we got the lock, -EAGAIN if node is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) * still alive (we couldn't get the lock) and < 0 on error. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) static int ocfs2_trylock_journal(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) int slot_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) int status, flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) struct inode *inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) slot_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) if (inode == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) mlog(ML_ERROR, "access error\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) status = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) if (is_bad_inode(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) mlog(ML_ERROR, "access error (bad inode)\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) status = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) SET_INODE_JOURNAL(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) if (status != -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) ocfs2_inode_unlock(inode, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) /* Call this underneath ocfs2_super_lock. It also assumes that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) * slot info struct has been updated from disk. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) unsigned int node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) int status, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) u32 gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) struct buffer_head *bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) struct ocfs2_dinode *di;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) /* This is called with the super block cluster lock, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) * know that the slot map can't change underneath us. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) for (i = 0; i < osb->max_slots; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) /* Read journal inode to get the recovery generation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) if (status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) di = (struct ocfs2_dinode *)bh->b_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) gen = ocfs2_get_recovery_generation(di);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) brelse(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) spin_lock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) osb->slot_recovery_generations[i] = gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) trace_ocfs2_mark_dead_nodes(i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) osb->slot_recovery_generations[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) if (i == osb->slot_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) if (status == -ENOENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) if (__ocfs2_recovery_map_test(osb, node_num)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) /* Ok, we have a slot occupied by another node which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) * is not in the recovery map. We trylock his journal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) * file here to test if he's alive. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) status = ocfs2_trylock_journal(osb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) if (!status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) /* Since we're called from mount, we know that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) * the recovery thread can't race us on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) * setting / checking the recovery bits. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) ocfs2_recovery_thread(osb, node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) } else if ((status < 0) && (status != -EAGAIN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) * randomness to the timeout to minimize multple nodes firing the timer at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) * same time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) static inline unsigned long ocfs2_orphan_scan_timeout(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) unsigned long time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) get_random_bytes(&time, sizeof(time));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) return msecs_to_jiffies(time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) * is done to catch any orphans that are left over in orphan directories.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) * It scans all slots, even ones that are in use. It does so to handle the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) * case described below:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) * Node 1 has an inode it was using. The dentry went away due to memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) * pressure. Node 1 closes the inode, but it's on the free list. The node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) * has the open lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) * Node 2 unlinks the inode. It grabs the dentry lock to notify others,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) * but node 1 has no dentry and doesn't get the message. It trylocks the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) * open lock, sees that another node has a PR, and does nothing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) * Later node 2 runs its orphan dir. It igets the inode, trylocks the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) * open lock, sees the PR still, and does nothing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) * Basically, we have to trigger an orphan iput on node 1. The only way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) * for this to happen is if node 1 runs node 2's orphan dir.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) * seconds. It gets an EX lock on os_lockres and checks sequence number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) * stored in LVB. If the sequence number has changed, it means some other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) * node has done the scan. This node skips the scan and tracks the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) * sequence number. If the sequence number didn't change, it means a scan
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) * hasn't happened. The node queues a scan and increments the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) * sequence number in the LVB.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) static void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) struct ocfs2_orphan_scan *os;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) int status, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) u32 seqno = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) os = &osb->osb_orphan_scan;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) trace_ocfs2_queue_orphan_scan_begin(os->os_count, os->os_seqno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) atomic_read(&os->os_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) status = ocfs2_orphan_scan_lock(osb, &seqno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) if (status != -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) /* Do no queue the tasks if the volume is being umounted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) if (os->os_seqno != seqno) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) os->os_seqno = seqno;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) for (i = 0; i < osb->max_slots; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) NULL, ORPHAN_NO_NEED_TRUNCATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) * We queued a recovery on orphan slots, increment the sequence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) * number and update LVB so other node will skip the scan for a while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) seqno++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) os->os_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) os->os_scantime = ktime_get_seconds();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) ocfs2_orphan_scan_unlock(osb, seqno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) trace_ocfs2_queue_orphan_scan_end(os->os_count, os->os_seqno,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) atomic_read(&os->os_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) /* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) static void ocfs2_orphan_scan_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) struct ocfs2_orphan_scan *os;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) struct ocfs2_super *osb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) os = container_of(work, struct ocfs2_orphan_scan,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) os_orphan_scan_work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) osb = os->os_osb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) mutex_lock(&os->os_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) ocfs2_queue_orphan_scan(osb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) ocfs2_orphan_scan_timeout());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) mutex_unlock(&os->os_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) struct ocfs2_orphan_scan *os;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) os = &osb->osb_orphan_scan;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) mutex_lock(&os->os_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) cancel_delayed_work(&os->os_orphan_scan_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) mutex_unlock(&os->os_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) struct ocfs2_orphan_scan *os;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) os = &osb->osb_orphan_scan;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) os->os_osb = osb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) os->os_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) os->os_seqno = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) mutex_init(&os->os_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) struct ocfs2_orphan_scan *os;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) os = &osb->osb_orphan_scan;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) os->os_scantime = ktime_get_seconds();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) ocfs2_orphan_scan_timeout());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) struct ocfs2_orphan_filldir_priv {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) struct dir_context ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) struct inode *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) struct ocfs2_super *osb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) enum ocfs2_orphan_reco_type orphan_reco_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) int name_len, loff_t pos, u64 ino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) unsigned type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) struct ocfs2_orphan_filldir_priv *p =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) struct inode *iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) if (name_len == 1 && !strncmp(".", name, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) if (name_len == 2 && !strncmp("..", name, 2))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) /* do not include dio entry in case of orphan scan */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) if ((p->orphan_reco_type == ORPHAN_NO_NEED_TRUNCATE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) OCFS2_DIO_ORPHAN_PREFIX_LEN)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) /* Skip bad inodes so that recovery can continue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) iter = ocfs2_iget(p->osb, ino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) if (IS_ERR(iter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) if (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) OCFS2_DIO_ORPHAN_PREFIX_LEN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) OCFS2_I(iter)->ip_flags |= OCFS2_INODE_DIO_ORPHAN_ENTRY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) /* Skip inodes which are already added to recover list, since dio may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) * happen concurrently with unlink/rename */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) if (OCFS2_I(iter)->ip_next_orphan) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) iput(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) /* No locking is required for the next_orphan queue as there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) * is only ever a single process doing orphan recovery. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) OCFS2_I(iter)->ip_next_orphan = p->head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) p->head = iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) static int ocfs2_queue_orphans(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) int slot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) struct inode **head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) enum ocfs2_orphan_reco_type orphan_reco_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) struct inode *orphan_dir_inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) struct ocfs2_orphan_filldir_priv priv = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) .ctx.actor = ocfs2_orphan_filldir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) .osb = osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) .head = *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) .orphan_reco_type = orphan_reco_type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) orphan_dir_inode = ocfs2_get_system_file_inode(osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) ORPHAN_DIR_SYSTEM_INODE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) if (!orphan_dir_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) status = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) inode_lock(orphan_dir_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) status = ocfs2_dir_foreach(orphan_dir_inode, &priv.ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) if (status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) goto out_cluster;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) *head = priv.head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) out_cluster:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) ocfs2_inode_unlock(orphan_dir_inode, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) inode_unlock(orphan_dir_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) iput(orphan_dir_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) int slot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) spin_lock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) ret = !osb->osb_orphan_wipes[slot];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) int slot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) spin_lock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) /* Mark ourselves such that new processes in delete_inode()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) * know to quit early. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) while (osb->osb_orphan_wipes[slot]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) /* If any processes are already in the middle of an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) * orphan wipe on this dir, then we need to wait for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) * them. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) wait_event_interruptible(osb->osb_wipe_event,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) ocfs2_orphan_recovery_can_continue(osb, slot));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) spin_lock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) spin_unlock(&osb->osb_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) int slot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) * Orphan recovery. Each mounted node has it's own orphan dir which we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) * must run during recovery. Our strategy here is to build a list of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) * the inodes in the orphan dir and iget/iput them. The VFS does
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) * (most) of the rest of the work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) * Orphan recovery can happen at any time, not just mount so we have a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) * couple of extra considerations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) * - We grab as many inodes as we can under the orphan dir lock -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) * doing iget() outside the orphan dir risks getting a reference on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) * an invalid inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) * - We must be sure not to deadlock with other processes on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) * system wanting to run delete_inode(). This can happen when they go
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) * to lock the orphan dir and the orphan recovery process attempts to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) * iget() inside the orphan dir lock. This can be avoided by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) * advertising our state to ocfs2_delete_inode().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) static int ocfs2_recover_orphans(struct ocfs2_super *osb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) int slot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) enum ocfs2_orphan_reco_type orphan_reco_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) struct inode *inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) struct inode *iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) struct ocfs2_inode_info *oi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) struct buffer_head *di_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) struct ocfs2_dinode *di = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) trace_ocfs2_recover_orphans(slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) ocfs2_mark_recovering_orphan_dir(osb, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) ret = ocfs2_queue_orphans(osb, slot, &inode, orphan_reco_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) ocfs2_clear_recovering_orphan_dir(osb, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) /* Error here should be noted, but we want to continue with as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) * many queued inodes as we've got. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) while (inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) oi = OCFS2_I(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) trace_ocfs2_recover_orphans_iput(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) (unsigned long long)oi->ip_blkno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) iter = oi->ip_next_orphan;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) oi->ip_next_orphan = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) inode_lock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) ret = ocfs2_rw_lock(inode, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) goto unlock_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) * We need to take and drop the inode lock to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) * force read inode from disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) ret = ocfs2_inode_lock(inode, &di_bh, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) goto unlock_rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) di = (struct ocfs2_dinode *)di_bh->b_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) if (di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) ret = ocfs2_truncate_file(inode, di_bh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) i_size_read(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) if (ret != -ENOSPC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) goto unlock_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) ret = ocfs2_del_inode_from_orphan(osb, inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) di_bh, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) unlock_inode:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) ocfs2_inode_unlock(inode, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) brelse(di_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) di_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) unlock_rw:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) ocfs2_rw_unlock(inode, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) unlock_mutex:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) inode_unlock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) /* clear dio flag in ocfs2_inode_info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) spin_lock(&oi->ip_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) /* Set the proper information to get us going into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) * ocfs2_delete_inode. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) spin_unlock(&oi->ip_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) inode = iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) /* This check is good because ocfs2 will wait on our recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) * thread before changing it to something other than MOUNTED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) * or DISABLED. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) wait_event(osb->osb_mount_event,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) atomic_read(&osb->vol_state) == VOLUME_DISABLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) /* If there's an error on mount, then we may never get to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) * MOUNTED flag, but this is set right before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) * dismount_volume() so we can trust it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) trace_ocfs2_wait_on_mount(VOLUME_DISABLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) mlog(0, "mount error, exiting!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) static int ocfs2_commit_thread(void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) struct ocfs2_super *osb = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) struct ocfs2_journal *journal = osb->journal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) /* we can trust j_num_trans here because _should_stop() is only set in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) * shutdown and nobody other than ourselves should be able to start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) * transactions. committing on shutdown might take a few iterations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) * as final transactions put deleted inodes on the list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) while (!(kthread_should_stop() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) atomic_read(&journal->j_num_trans) == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) wait_event_interruptible(osb->checkpoint_event,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) atomic_read(&journal->j_num_trans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) || kthread_should_stop());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) status = ocfs2_commit_cache(osb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) static unsigned long abort_warn_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) /* Warn about this once per minute */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) if (printk_timed_ratelimit(&abort_warn_time, 60*HZ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) mlog(ML_ERROR, "status = %d, journal is "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) "already aborted.\n", status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) * After ocfs2_commit_cache() fails, j_num_trans has a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) * non-zero value. Sleep here to avoid a busy-wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) * loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) msleep_interruptible(1000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) mlog(ML_KTHREAD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) "commit_thread: %u transactions pending on "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) "shutdown\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) atomic_read(&journal->j_num_trans));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) /* Reads all the journal inodes without taking any cluster locks. Used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) * for hard readonly access to determine whether any journal requires
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) * recovery. Also used to refresh the recovery generation numbers after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) * a journal has been recovered by another node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) unsigned int slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) struct buffer_head *di_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) struct ocfs2_dinode *di;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) int journal_dirty = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) for(slot = 0; slot < osb->max_slots; slot++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) di = (struct ocfs2_dinode *) di_bh->b_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) osb->slot_recovery_generations[slot] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) ocfs2_get_recovery_generation(di);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) if (le32_to_cpu(di->id1.journal1.ij_flags) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) OCFS2_JOURNAL_DIRTY_FL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) journal_dirty = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) brelse(di_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) di_bh = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) if (journal_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) ret = -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) }