^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* -*- mode: c; c-basic-offset: 8; -*-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * vim: noexpandtab sw=8 ts=8 sts=0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * dlmrecovery.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * recovery stuff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Copyright (C) 2004 Oracle. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/sysctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/socket.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/inet.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/timer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include "../cluster/heartbeat.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include "../cluster/nodemanager.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include "../cluster/tcp.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include "dlmapi.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include "dlmcommon.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include "dlmdomain.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_RECOVERY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include "../cluster/masklog.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) static int dlm_recovery_thread(void *data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) static int dlm_do_recovery(struct dlm_ctxt *dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) static int dlm_pick_recovery_master(struct dlm_ctxt *dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) static int dlm_request_all_locks(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) u8 request_from, u8 dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) static inline int dlm_num_locks_in_lockres(struct dlm_lock_resource *res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) const char *lockname, int namelen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) int total_locks, u64 cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) u8 flags, u8 master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) struct dlm_migratable_lockres *mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) u8 send_to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) int total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct dlm_migratable_lockres *mres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) static int dlm_send_all_done_msg(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) u8 dead_node, u8 send_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) struct list_head *list, u8 dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) u8 dead_node, u8 new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) static void dlm_reco_ast(void *astdata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) static void dlm_reco_bast(void *astdata, int blocked_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) static void dlm_request_all_locks_worker(struct dlm_work_item *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) void *data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) u8 *real_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) static u64 dlm_get_next_mig_cookie(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) static DEFINE_SPINLOCK(dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) static DEFINE_SPINLOCK(dlm_mig_cookie_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) static u64 dlm_mig_cookie = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) static u64 dlm_get_next_mig_cookie(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) u64 c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) spin_lock(&dlm_mig_cookie_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) c = dlm_mig_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) if (dlm_mig_cookie == (~0ULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) dlm_mig_cookie = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) dlm_mig_cookie++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) spin_unlock(&dlm_mig_cookie_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) return c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) static inline void dlm_set_reco_dead_node(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) if (dlm->reco.dead_node != dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) mlog(0, "%s: changing dead_node from %u to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) dlm->name, dlm->reco.dead_node, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) dlm->reco.dead_node = dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) static inline void dlm_set_reco_master(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) u8 master)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) mlog(0, "%s: changing new_master from %u to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) dlm->name, dlm->reco.new_master, master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) dlm->reco.new_master = master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) static inline void __dlm_reset_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) clear_bit(dlm->reco.dead_node, dlm->recovery_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) static inline void dlm_reset_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) __dlm_reset_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) /* Worker function used during recovery. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) void dlm_dispatch_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) struct dlm_ctxt *dlm =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) container_of(work, struct dlm_ctxt, dispatched_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) LIST_HEAD(tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) struct dlm_work_item *item, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) dlm_workfunc_t *workfunc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) int tot=0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) spin_lock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) list_splice_init(&dlm->work_list, &tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) spin_unlock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) list_for_each_entry(item, &tmp_list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) tot++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) mlog(0, "%s: work thread has %d work items\n", dlm->name, tot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) list_for_each_entry_safe(item, next, &tmp_list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) workfunc = item->func;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) list_del_init(&item->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) /* already have ref on dlm to avoid having
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * it disappear. just double-check. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) BUG_ON(item->dlm != dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) /* this is allowed to sleep and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * call network stuff */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) workfunc(item, item->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) kfree(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * RECOVERY THREAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) /* wake the recovery thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) * this will wake the reco thread in one of three places
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * 1) sleeping with no recovery happening
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * 2) sleeping with recovery mastered elsewhere
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * 3) recovery mastered here, waiting on reco data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) wake_up(&dlm->dlm_reco_thread_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) /* Launch the recovery thread */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) int dlm_launch_recovery_thread(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) mlog(0, "starting dlm recovery thread...\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) dlm->dlm_reco_thread_task = kthread_run(dlm_recovery_thread, dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) "dlm_reco-%s", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) if (IS_ERR(dlm->dlm_reco_thread_task)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) mlog_errno(PTR_ERR(dlm->dlm_reco_thread_task));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) dlm->dlm_reco_thread_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) void dlm_complete_recovery_thread(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) if (dlm->dlm_reco_thread_task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) mlog(0, "waiting for dlm recovery thread to exit\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) kthread_stop(dlm->dlm_reco_thread_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) dlm->dlm_reco_thread_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * this is lame, but here's how recovery works...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * 1) all recovery threads cluster wide will work on recovering
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * ONE node at a time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * 2) negotiate who will take over all the locks for the dead node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * thats right... ALL the locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * 3) once a new master is chosen, everyone scans all locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * and moves aside those mastered by the dead guy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) * 4) each of these locks should be locked until recovery is done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * 5) the new master collects up all of secondary lock queue info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * one lock at a time, forcing each node to communicate back
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * before continuing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * 6) each secondary lock queue responds with the full known lock info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * 7) once the new master has run all its locks, it sends a ALLDONE!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * message to everyone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * 8) upon receiving this message, the secondary queue node unlocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * and responds to the ALLDONE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * 9) once the new master gets responses from everyone, he unlocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * everything and recovery for this dead node is done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) *10) go back to 2) while there are still dead nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) static void dlm_print_reco_node_status(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) struct dlm_reco_node_data *ndata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) mlog(ML_NOTICE, "%s(%d): recovery info, state=%s, dead=%u, master=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) dlm->reco.state & DLM_RECO_STATE_ACTIVE ? "ACTIVE" : "inactive",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) list_for_each_entry(ndata, &dlm->reco.node_data, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) char *st = "unknown";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) switch (ndata->state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) case DLM_RECO_NODE_DATA_INIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) st = "init";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) case DLM_RECO_NODE_DATA_REQUESTING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) st = "requesting";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) case DLM_RECO_NODE_DATA_DEAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) st = "dead";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) case DLM_RECO_NODE_DATA_RECEIVING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) st = "receiving";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) case DLM_RECO_NODE_DATA_REQUESTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) st = "requested";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) case DLM_RECO_NODE_DATA_DONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) st = "done";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) case DLM_RECO_NODE_DATA_FINALIZE_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) st = "finalize-sent";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) st = "bad";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) mlog(ML_NOTICE, "%s: reco state, node %u, state=%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) dlm->name, ndata->node_num, st);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) list_for_each_entry(res, &dlm->reco.resources, recovering) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) mlog(ML_NOTICE, "%s: lockres %.*s on recovering list\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) dlm->name, res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) #define DLM_RECO_THREAD_TIMEOUT_MS (5 * 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) static int dlm_recovery_thread(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) unsigned long timeout = msecs_to_jiffies(DLM_RECO_THREAD_TIMEOUT_MS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) mlog(0, "dlm thread running for %s...\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) while (!kthread_should_stop()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) if (dlm_domain_fully_joined(dlm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) status = dlm_do_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) if (status == -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) /* do not sleep, recheck immediately. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) wait_event_interruptible_timeout(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) kthread_should_stop(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) mlog(0, "quitting DLM recovery thread\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) /* returns true when the recovery master has contacted us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) static int dlm_reco_master_ready(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) int ready;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) ready = (dlm->reco.new_master != O2NM_INVALID_NODE_NUM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) return ready;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) /* returns true if node is no longer in the domain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * could be dead or just not joined */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) int dead;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) dead = !test_bit(node, dlm->domain_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) return dead;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) /* returns true if node is no longer in the domain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * could be dead or just not joined */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) static int dlm_is_node_recovered(struct dlm_ctxt *dlm, u8 node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) int recovered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) recovered = !test_bit(node, dlm->recovery_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) return recovered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) void dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) if (dlm_is_node_dead(dlm, node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) printk(KERN_NOTICE "o2dlm: Waiting on the death of node %u in "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) "domain %s\n", node, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) if (timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) wait_event_timeout(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) dlm_is_node_dead(dlm, node),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) msecs_to_jiffies(timeout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) wait_event(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) dlm_is_node_dead(dlm, node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) void dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) if (dlm_is_node_recovered(dlm, node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) printk(KERN_NOTICE "o2dlm: Waiting on the recovery of node %u in "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) "domain %s\n", node, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) if (timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) wait_event_timeout(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) dlm_is_node_recovered(dlm, node),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) msecs_to_jiffies(timeout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) wait_event(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) dlm_is_node_recovered(dlm, node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) /* callers of the top-level api calls (dlmlock/dlmunlock) should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) * block on the dlm->reco.event when recovery is in progress.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) * the dlm recovery thread will set this state when it begins
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) * recovering a dead node (as the new master or not) and clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) * the state and wake as soon as all affected lock resources have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) * been marked with the RECOVERY flag */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) static int dlm_in_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) int in_recovery;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) in_recovery = !!(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) return in_recovery;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) if (dlm_in_recovery(dlm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) mlog(0, "%s: reco thread %d in recovery: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) "state=%d, master=%u, dead=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) dlm->reco.state, dlm->reco.new_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) wait_event(dlm->reco.event, !dlm_in_recovery(dlm));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) static void dlm_begin_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) dlm->name, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) dlm->reco.state |= DLM_RECO_STATE_ACTIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) static void dlm_end_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) BUG_ON(!(dlm->reco.state & DLM_RECO_STATE_ACTIVE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) dlm->reco.state &= ~DLM_RECO_STATE_ACTIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) printk(KERN_NOTICE "o2dlm: End recovery on domain %s\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) wake_up(&dlm->reco.event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) static void dlm_print_recovery_master(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) printk(KERN_NOTICE "o2dlm: Node %u (%s) is the Recovery Master for the "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) "dead node %u in domain %s\n", dlm->reco.new_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) (dlm->node_num == dlm->reco.new_master ? "me" : "he"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) dlm->reco.dead_node, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) static int dlm_do_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) int status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) if (dlm->migrate_done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) mlog(0, "%s: no need do recovery after migrating all "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) "lock resources\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) /* check to see if the new master has died */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) test_bit(dlm->reco.new_master, dlm->recovery_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) mlog(0, "new master %u died while recovering %u!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) dlm->reco.new_master, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) /* unset the new_master, leave dead_node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) /* select a target to recover */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) int bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) if (bit >= O2NM_MAX_NODES || bit < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) dlm_set_reco_dead_node(dlm, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) } else if (!test_bit(dlm->reco.dead_node, dlm->recovery_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) /* BUG? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) mlog(ML_ERROR, "dead_node %u no longer in recovery map!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) // mlog(0, "nothing to recover! sleeping now!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) /* return to main thread loop and sleep. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) /* take write barrier */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) /* (stops the list reshuffling thread, proxy ast handling) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) dlm_begin_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) if (dlm->reco.new_master == dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) goto master_here;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) if (dlm->reco.new_master == O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) /* choose a new master, returns 0 if this node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) * is the master, -EEXIST if it's another node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) * this does not return until a new master is chosen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) * or recovery completes entirely. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) ret = dlm_pick_recovery_master(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) /* already notified everyone. go. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) goto master_here;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) mlog(0, "another node will master this recovery session.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) dlm_print_recovery_master(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) /* it is safe to start everything back up here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * because all of the dead node's lock resources
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * have been marked as in-recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) dlm_end_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) /* sleep out in main dlm_recovery_thread loop. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) master_here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) dlm_print_recovery_master(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) /* we should never hit this anymore */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) mlog(ML_ERROR, "%s: Error %d remastering locks for node %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) "retrying.\n", dlm->name, status, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) /* yield a bit to allow any final network messages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) * to get handled on remaining nodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) msleep(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) /* success! see if any other nodes need recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) dlm->name, dlm->reco.dead_node, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) __dlm_reset_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) dlm_end_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) /* continue and look for another dead node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) int status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) struct dlm_reco_node_data *ndata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) int all_nodes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) int destroy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) int pass = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) /* we have become recovery master. there is no escaping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) * this, so just keep trying until we get it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) status = dlm_init_recovery_area(dlm, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) mlog(ML_ERROR, "%s: failed to alloc recovery area, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) "retrying\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) msleep(1000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) } while (status != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) /* safe to access the node data list without a lock, since this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) * process is the only one to change the list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) list_for_each_entry(ndata, &dlm->reco.node_data, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) ndata->state = DLM_RECO_NODE_DATA_REQUESTING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) mlog(0, "%s: Requesting lock info from node %u\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) ndata->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) if (ndata->node_num == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) ndata->state = DLM_RECO_NODE_DATA_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) status = dlm_request_all_locks(dlm, ndata->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) if (dlm_is_host_down(status)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) /* node died, ignore it for recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) ndata->state = DLM_RECO_NODE_DATA_DEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) /* wait for the domain map to catch up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) * with the network state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) wait_event_timeout(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) dlm_is_node_dead(dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) ndata->node_num),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) msecs_to_jiffies(1000));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) mlog(0, "waited 1 sec for %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) "dead? %s\n", ndata->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) dlm_is_node_dead(dlm, ndata->node_num) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) "yes" : "no");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) /* -ENOMEM on the other node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) mlog(0, "%s: node %u returned "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) "%d during recovery, retrying "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) "after a short wait\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) dlm->name, ndata->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) msleep(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) } while (status != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) spin_lock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) switch (ndata->state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) case DLM_RECO_NODE_DATA_INIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) case DLM_RECO_NODE_DATA_FINALIZE_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) case DLM_RECO_NODE_DATA_REQUESTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) case DLM_RECO_NODE_DATA_DEAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) mlog(0, "node %u died after requesting "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) "recovery info for node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) ndata->node_num, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) /* fine. don't need this node's info.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) * continue without it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) case DLM_RECO_NODE_DATA_REQUESTING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) ndata->state = DLM_RECO_NODE_DATA_REQUESTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) mlog(0, "now receiving recovery data from "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) "node %u for dead node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) ndata->node_num, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) case DLM_RECO_NODE_DATA_RECEIVING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) mlog(0, "already receiving recovery data from "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) "node %u for dead node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) ndata->node_num, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) case DLM_RECO_NODE_DATA_DONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) mlog(0, "already DONE receiving recovery data "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) "from node %u for dead node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) ndata->node_num, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) spin_unlock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) mlog(0, "%s: Done requesting all lock info\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) /* nodes should be sending reco data now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) * just need to wait */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) /* check all the nodes now to see if we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) * done, or if anyone died */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) all_nodes_done = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) spin_lock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) list_for_each_entry(ndata, &dlm->reco.node_data, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) mlog(0, "checking recovery state of node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) ndata->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) switch (ndata->state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) case DLM_RECO_NODE_DATA_INIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) case DLM_RECO_NODE_DATA_REQUESTING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) mlog(ML_ERROR, "bad ndata state for "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) "node %u: state=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) ndata->node_num, ndata->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) case DLM_RECO_NODE_DATA_DEAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) mlog(0, "node %u died after "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) "requesting recovery info for "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) "node %u\n", ndata->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) case DLM_RECO_NODE_DATA_RECEIVING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) case DLM_RECO_NODE_DATA_REQUESTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) mlog(0, "%s: node %u still in state %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) dlm->name, ndata->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) ndata->state==DLM_RECO_NODE_DATA_RECEIVING ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) "receiving" : "requested");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) all_nodes_done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) case DLM_RECO_NODE_DATA_DONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) mlog(0, "%s: node %u state is done\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) dlm->name, ndata->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) case DLM_RECO_NODE_DATA_FINALIZE_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) mlog(0, "%s: node %u state is finalize\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) dlm->name, ndata->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) spin_unlock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) mlog(0, "pass #%d, all_nodes_done?: %s\n", ++pass,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) all_nodes_done?"yes":"no");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) if (all_nodes_done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) /* Set this flag on recovery master to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * a new recovery for another dead node start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * before the recovery is not done. That may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * cause recovery hung.*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) /* all nodes are now in DLM_RECO_NODE_DATA_DONE state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) * just send a finalize message to everyone and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) * clean up */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) mlog(0, "all nodes are done! send finalize\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) ret = dlm_send_finalize_reco_message(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) dlm_finish_local_lockres_recovery(dlm, dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) mlog(0, "should be done with recovery!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) mlog(0, "finishing recovery of %s at %lu, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) "dead=%u, this=%u, new=%u\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) jiffies, dlm->reco.dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) dlm->node_num, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) destroy = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) /* rescan everything marked dirty along the way */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) dlm_kick_thread(dlm, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) /* wait to be signalled, with periodic timeout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) * to check for node death */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) wait_event_interruptible_timeout(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) kthread_should_stop(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) msecs_to_jiffies(DLM_RECO_THREAD_TIMEOUT_MS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) if (destroy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) dlm_destroy_recovery_area(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) int num=0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) struct dlm_reco_node_data *ndata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) memcpy(dlm->reco.node_map, dlm->domain_map, sizeof(dlm->domain_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) /* nodes can only be removed (by dying) after dropping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) * this lock, and death will be trapped later, so this should do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) num = find_next_bit (dlm->reco.node_map, O2NM_MAX_NODES, num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) if (num >= O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) BUG_ON(num == dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) ndata = kzalloc(sizeof(*ndata), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) if (!ndata) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) dlm_destroy_recovery_area(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) ndata->node_num = num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) ndata->state = DLM_RECO_NODE_DATA_INIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) spin_lock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) list_add_tail(&ndata->list, &dlm->reco.node_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) spin_unlock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) num++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) struct dlm_reco_node_data *ndata, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) LIST_HEAD(tmplist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) spin_lock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) list_splice_init(&dlm->reco.node_data, &tmplist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) spin_unlock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) list_for_each_entry_safe(ndata, next, &tmplist, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) list_del_init(&ndata->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) kfree(ndata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) struct dlm_lock_request lr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) mlog(0, "\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) mlog(0, "dlm_request_all_locks: dead node is %u, sending request "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) "to %u\n", dead_node, request_from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) memset(&lr, 0, sizeof(lr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) lr.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) lr.dead_node = dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) // send message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) &lr, sizeof(lr), request_from, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) /* negative status is handled by caller */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) "to recover dead node %u\n", dlm->name, ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) request_from, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) ret = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) // return from here, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) // sleep until all received or error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) struct dlm_lock_request *lr = (struct dlm_lock_request *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) char *buf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) struct dlm_work_item *item = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) if (lr->dead_node != dlm->reco.dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) mlog(ML_ERROR, "%s: node %u sent dead_node=%u, but local "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) "dead_node is %u\n", dlm->name, lr->node_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) lr->dead_node, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) dlm_print_reco_node_status(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) /* this is a hack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) BUG_ON(lr->dead_node != dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) item = kzalloc(sizeof(*item), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) if (!item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) /* this will get freed by dlm_request_all_locks_worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) buf = (char *) __get_free_page(GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) if (!buf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) kfree(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) /* queue up work for dlm_request_all_locks_worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) dlm_grab(dlm); /* get an extra ref for the work item */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) dlm_init_work_item(dlm, item, dlm_request_all_locks_worker, buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) item->u.ral.reco_master = lr->node_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) item->u.ral.dead_node = lr->dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) spin_lock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) list_add_tail(&item->list, &dlm->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) spin_unlock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) queue_work(dlm->dlm_worker, &dlm->dispatched_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) struct dlm_migratable_lockres *mres;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) struct dlm_ctxt *dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) LIST_HEAD(resources);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) u8 dead_node, reco_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) int skip_all_done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) dlm = item->dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) dead_node = item->u.ral.dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) reco_master = item->u.ral.reco_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) mres = (struct dlm_migratable_lockres *)data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) mlog(0, "%s: recovery worker started, dead=%u, master=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) dlm->name, dead_node, reco_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) if (dead_node != dlm->reco.dead_node ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) reco_master != dlm->reco.new_master) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) /* worker could have been created before the recovery master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) * died. if so, do not continue, but do not error. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) if (dlm->reco.new_master == O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) mlog(ML_NOTICE, "%s: will not send recovery state, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) "recovery master %u died, thread=(dead=%u,mas=%u)"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) " current=(dead=%u,mas=%u)\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) reco_master, dead_node, reco_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) mlog(ML_NOTICE, "%s: reco state invalid: reco(dead=%u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) "master=%u), request(dead=%u, master=%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) dlm->name, dlm->reco.dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) dlm->reco.new_master, dead_node, reco_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) /* lock resources should have already been moved to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) * dlm->reco.resources list. now move items from that list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) * to a temp list if the dead owner matches. note that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) * whole cluster recovers only one node at a time, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) * can safely move UNKNOWN lock resources for each recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) * session. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) dlm_move_reco_locks_to_list(dlm, &resources, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) /* now we can begin blasting lockreses without the dlm lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) /* any errors returned will be due to the new_master dying,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) * the dlm_reco_thread should detect this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) list_for_each_entry(res, &resources, recovering) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) ret = dlm_send_one_lockres(dlm, res, mres, reco_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) DLM_MRES_RECOVERY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) mlog(ML_ERROR, "%s: node %u went down while sending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) "recovery state for dead node %u, ret=%d\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) reco_master, dead_node, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) skip_all_done = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) /* move the resources back to the list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) list_splice_init(&resources, &dlm->reco.resources);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) if (!skip_all_done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) ret = dlm_send_all_done_msg(dlm, dead_node, reco_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) mlog(ML_ERROR, "%s: node %u went down while sending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) "recovery all-done for dead node %u, ret=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) dlm->name, reco_master, dead_node, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) free_page((unsigned long)data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) int ret, tmpret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) struct dlm_reco_data_done done_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) memset(&done_msg, 0, sizeof(done_msg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) done_msg.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) done_msg.dead_node = dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) mlog(0, "sending DATA DONE message to %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) "my node=%u, dead node=%u\n", send_to, done_msg.node_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) done_msg.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) sizeof(done_msg), send_to, &tmpret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) mlog(ML_ERROR, "%s: Error %d send RECO_DATA_DONE to node %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) "to recover dead node %u\n", dlm->name, ret, send_to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) if (!dlm_is_host_down(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) ret = tmpret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) struct dlm_reco_node_data *ndata = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) int ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) mlog(0, "got DATA DONE: dead_node=%u, reco.dead_node=%u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) "node_idx=%u, this node=%u\n", done->dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) dlm->reco.dead_node, done->node_idx, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) mlog_bug_on_msg((done->dead_node != dlm->reco.dead_node),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) "Got DATA DONE: dead_node=%u, reco.dead_node=%u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) "node_idx=%u, this node=%u\n", done->dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) dlm->reco.dead_node, done->node_idx, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) spin_lock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) list_for_each_entry(ndata, &dlm->reco.node_data, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) if (ndata->node_num != done->node_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) switch (ndata->state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) /* should have moved beyond INIT but not to FINALIZE yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) case DLM_RECO_NODE_DATA_INIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) case DLM_RECO_NODE_DATA_DEAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) case DLM_RECO_NODE_DATA_FINALIZE_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) mlog(ML_ERROR, "bad ndata state for node %u:"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) " state=%d\n", ndata->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) ndata->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) /* these states are possible at this point, anywhere along
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) * the line of recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) case DLM_RECO_NODE_DATA_DONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) case DLM_RECO_NODE_DATA_RECEIVING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) case DLM_RECO_NODE_DATA_REQUESTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) case DLM_RECO_NODE_DATA_REQUESTING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) mlog(0, "node %u is DONE sending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) "recovery data!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) ndata->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) ndata->state = DLM_RECO_NODE_DATA_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) spin_unlock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) /* wake the recovery thread, some node is done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) dlm_kick_recovery_thread(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) mlog(ML_ERROR, "failed to find recovery node data for node "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) "%u\n", done->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) mlog(0, "leaving reco data done handler, ret=%d\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) struct list_head *list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) struct dlm_lock_resource *res, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) struct dlm_lock *lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) /* always prune any $RECOVERY entries for dead nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) * otherwise hangs can occur during later recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) if (dlm_is_recovery_lock(res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) res->lockname.len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) list_for_each_entry(lock, &res->granted, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) if (lock->ml.node == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) mlog(0, "AHA! there was "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) "a $RECOVERY lock for dead "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) "node %u (%s)!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) dead_node, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) list_del_init(&lock->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) /* Can't schedule DLM_UNLOCK_FREE_LOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) * - do manually */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) if (res->owner == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) mlog(0, "found lockres owned by dead node while "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) "doing recovery for node %u. sending it.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) list_move_tail(&res->recovering, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) mlog(0, "found UNKNOWN owner while doing recovery "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) "for node %u. sending it.\n", dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) list_move_tail(&res->recovering, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) static inline int dlm_num_locks_in_lockres(struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) int total_locks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) struct list_head *iter, *queue = &res->granted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) for (i=0; i<3; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) list_for_each(iter, queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) total_locks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) queue++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) return total_locks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) struct dlm_migratable_lockres *mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) u8 send_to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) int total_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) u64 mig_cookie = be64_to_cpu(mres->mig_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) int mres_total_locks = be32_to_cpu(mres->total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) int ret = 0, status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) u8 orig_flags = mres->flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) orig_master = mres->master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) BUG_ON(mres->num_locks > DLM_MAX_MIGRATABLE_LOCKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) if (!mres->num_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) /* add an all-done flag if we reached the last lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) orig_flags = mres->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) BUG_ON(total_locks > mres_total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) if (total_locks == mres_total_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) mres->flags |= DLM_MRES_ALL_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) dlm->name, res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) send_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) /* send it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) struct_size(mres, ml, mres->num_locks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) send_to, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) /* XXX: negative status is not handled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) * this will end up killing this node. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) mlog(ML_ERROR, "%s: res %.*s, Error %d send MIG_LOCKRES to "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) "node %u (%s)\n", dlm->name, mres->lockname_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) mres->lockname, ret, send_to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) (orig_flags & DLM_MRES_MIGRATION ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) "migration" : "recovery"));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) /* might get an -ENOMEM back here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) ret = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) if (ret == -EFAULT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) mlog(ML_ERROR, "node %u told me to kill "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) "myself!\n", send_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) /* zero and reinit the message buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) dlm_init_migratable_lockres(mres, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) res->lockname.len, mres_total_locks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) mig_cookie, orig_flags, orig_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) const char *lockname, int namelen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) int total_locks, u64 cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) u8 flags, u8 master)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) /* mres here is one full page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) clear_page(mres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) mres->lockname_len = namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) memcpy(mres->lockname, lockname, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) mres->num_locks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) mres->total_locks = cpu_to_be32(total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) mres->mig_cookie = cpu_to_be64(cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) mres->flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) mres->master = master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) static void dlm_prepare_lvb_for_migration(struct dlm_lock *lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) struct dlm_migratable_lockres *mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) int queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) if (!lock->lksb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) /* Ignore lvb in all locks in the blocked list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) if (queue == DLM_BLOCKED_LIST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) /* Only consider lvbs in locks with granted EX or PR lock levels */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) if (lock->ml.type != LKM_EXMODE && lock->ml.type != LKM_PRMODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) if (dlm_lvb_is_empty(mres->lvb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) /* Ensure the lvb copied for migration matches in other valid locks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) if (!memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) mlog(ML_ERROR, "Mismatched lvb in lock cookie=%u:%llu, name=%.*s, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) "node=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) lock->lockres->lockname.len, lock->lockres->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) lock->ml.node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) dlm_print_one_lock_resource(lock->lockres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) /* returns 1 if this lock fills the network structure,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) * 0 otherwise */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) static int dlm_add_lock_to_array(struct dlm_lock *lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) struct dlm_migratable_lockres *mres, int queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) struct dlm_migratable_lock *ml;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) int lock_num = mres->num_locks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) ml = &(mres->ml[lock_num]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) ml->cookie = lock->ml.cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) ml->type = lock->ml.type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) ml->convert_type = lock->ml.convert_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) ml->highest_blocked = lock->ml.highest_blocked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) ml->list = queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) if (lock->lksb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) ml->flags = lock->lksb->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) dlm_prepare_lvb_for_migration(lock, mres, queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) ml->node = lock->ml.node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) mres->num_locks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) /* we reached the max, send this network message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) if (mres->num_locks == DLM_MAX_MIGRATABLE_LOCKS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) static void dlm_add_dummy_lock(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) struct dlm_migratable_lockres *mres)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) struct dlm_lock dummy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) memset(&dummy, 0, sizeof(dummy));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) dummy.ml.cookie = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) dummy.ml.type = LKM_IVMODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) dummy.ml.convert_type = LKM_IVMODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) dummy.ml.highest_blocked = LKM_IVMODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) dummy.lksb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) dummy.ml.node = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) dlm_add_lock_to_array(&dummy, mres, DLM_BLOCKED_LIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) static inline int dlm_is_dummy_lock(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) struct dlm_migratable_lock *ml,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) u8 *nodenum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) if (unlikely(ml->cookie == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) ml->type == LKM_IVMODE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) ml->convert_type == LKM_IVMODE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) ml->highest_blocked == LKM_IVMODE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) ml->list == DLM_BLOCKED_LIST)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) *nodenum = ml->node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) struct dlm_migratable_lockres *mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) u8 send_to, u8 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) struct list_head *queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) int total_locks, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) u64 mig_cookie = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) struct dlm_lock *lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) BUG_ON(!(flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) mlog(0, "sending to %u\n", send_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) total_locks = dlm_num_locks_in_lockres(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) if (total_locks > DLM_MAX_MIGRATABLE_LOCKS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) /* rare, but possible */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) mlog(0, "argh. lockres has %d locks. this will "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) "require more than one network packet to "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) "migrate\n", total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) mig_cookie = dlm_get_next_mig_cookie();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) dlm_init_migratable_lockres(mres, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) res->lockname.len, total_locks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) mig_cookie, flags, res->owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) total_locks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) queue = dlm_list_idx_to_ptr(res, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) list_for_each_entry(lock, queue, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) /* add another lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) total_locks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) if (!dlm_add_lock_to_array(lock, mres, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) /* this filled the lock message,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) * we must send it immediately. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) ret = dlm_send_mig_lockres_msg(dlm, mres, send_to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) res, total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) if (total_locks == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) /* send a dummy lock to indicate a mastery reference only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) mlog(0, "%s:%.*s: sending dummy lock to %u, %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) dlm->name, res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) send_to, flags & DLM_MRES_RECOVERY ? "recovery" :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) "migration");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) dlm_add_dummy_lock(dlm, mres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) /* flush any remaining locks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) mlog(ML_ERROR, "%s: dlm_send_mig_lockres_msg returned %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) dlm->name, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) if (!dlm_is_host_down(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) mlog(0, "%s: node %u went down while sending %s "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) "lockres %.*s\n", dlm->name, send_to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) flags & DLM_MRES_RECOVERY ? "recovery" : "migration",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) * this message will contain no more than one page worth of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) * recovery data, and it will work on only one lockres.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) * there may be many locks in this page, and we may need to wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) * for additional packets to complete all the locks (rare, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) * possible).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) * NOTE: the allocation error cases here are scary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) * we really cannot afford to fail an alloc in recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) * do we spin? returning an error only delays the problem really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) struct dlm_migratable_lockres *mres =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) (struct dlm_migratable_lockres *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) u8 real_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) u8 extra_refs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) char *buf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) struct dlm_work_item *item = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) struct dlm_lock_resource *res = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) unsigned int hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) if (!dlm_joined(dlm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) mlog(ML_ERROR, "Domain %s not joined! "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) "lockres %.*s, master %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) dlm->name, mres->lockname_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) mres->lockname, mres->master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) BUG_ON(!(mres->flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) real_master = mres->master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) /* cannot migrate a lockres with no master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) BUG_ON(!(mres->flags & DLM_MRES_RECOVERY));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) mlog(0, "%s message received from node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) (mres->flags & DLM_MRES_RECOVERY) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) "recovery" : "migration", mres->master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) if (mres->flags & DLM_MRES_ALL_DONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) mlog(0, "all done flag. all lockres data received!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) buf = kmalloc(be16_to_cpu(msg->data_len), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) item = kzalloc(sizeof(*item), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) if (!buf || !item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) /* lookup the lock to see if we have a secondary queue for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) * already... just add the locks in and this will have its owner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) * and RECOVERY flag changed when it completes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) hash = dlm_lockid_hash(mres->lockname, mres->lockname_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) res = __dlm_lookup_lockres_full(dlm, mres->lockname, mres->lockname_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) /* this will get a ref on res */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) /* mark it as recovering/migrating and hash it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) if (res->state & DLM_LOCK_RES_DROPPING_REF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) mlog(0, "%s: node is attempting to migrate "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) "lockres %.*s, but marked as dropping "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) " ref!\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) mres->lockname_len, mres->lockname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) if (mres->flags & DLM_MRES_RECOVERY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) res->state |= DLM_LOCK_RES_RECOVERING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) if (res->state & DLM_LOCK_RES_MIGRATING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) /* this is at least the second
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) * lockres message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) mlog(0, "lock %.*s is already migrating\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) mres->lockname_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) mres->lockname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) } else if (res->state & DLM_LOCK_RES_RECOVERING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) /* caller should BUG */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) mlog(ML_ERROR, "node is attempting to migrate "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) "lock %.*s, but marked as recovering!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) mres->lockname_len, mres->lockname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) res->state |= DLM_LOCK_RES_MIGRATING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) /* need to allocate, just like if it was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) * mastered here normally */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) res = dlm_new_lockres(dlm, mres->lockname, mres->lockname_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) if (!res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) /* to match the ref that we would have gotten if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) * dlm_lookup_lockres had succeeded */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) dlm_lockres_get(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) /* mark it as recovering/migrating and hash it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) if (mres->flags & DLM_MRES_RECOVERY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) res->state |= DLM_LOCK_RES_RECOVERING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) res->state |= DLM_LOCK_RES_MIGRATING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) __dlm_insert_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) /* Add an extra ref for this lock-less lockres lest the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) * dlm_thread purges it before we get the chance to add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) * locks to it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) dlm_lockres_get(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) /* There are three refs that need to be put.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) * 1. Taken above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) * 2. kref_init in dlm_new_lockres()->dlm_init_lockres().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) * 3. dlm_lookup_lockres()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) * The first one is handled at the end of this function. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) * other two are handled in the worker thread after locks have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) * been attached. Yes, we don't wait for purge time to match
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) * kref_init. The lockres will still have atleast one ref
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) * added because it is in the hash __dlm_insert_lockres() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) extra_refs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) /* now that the new lockres is inserted,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) * make it usable by other processes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) /* at this point we have allocated everything we need,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) * and we have a hashed lockres with an extra ref and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) * the proper res->state flags. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) /* drop this either when master requery finds a different master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) * or when a lock is added by the recovery worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) dlm_lockres_grab_inflight_ref(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) if (mres->master == DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) /* migration cannot have an unknown master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) BUG_ON(!(mres->flags & DLM_MRES_RECOVERY));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) mlog(0, "recovery has passed me a lockres with an "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) "unknown owner.. will need to requery: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) "%.*s\n", mres->lockname_len, mres->lockname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) /* take a reference now to pin the lockres, drop it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) * when locks are added in the worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) dlm_change_lockres_owner(dlm, res, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) /* queue up work for dlm_mig_lockres_worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) dlm_grab(dlm); /* get an extra ref for the work item */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) memcpy(buf, msg->buf, be16_to_cpu(msg->data_len)); /* copy the whole message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) dlm_init_work_item(dlm, item, dlm_mig_lockres_worker, buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) item->u.ml.lockres = res; /* already have a ref */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) item->u.ml.real_master = real_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) item->u.ml.extra_ref = extra_refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) spin_lock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) list_add_tail(&item->list, &dlm->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) spin_unlock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) queue_work(dlm->dlm_worker, &dlm->dispatched_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) /* One extra ref taken needs to be put here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) if (extra_refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) kfree(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) kfree(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) struct dlm_ctxt *dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) struct dlm_migratable_lockres *mres;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) u8 real_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) u8 extra_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) dlm = item->dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) mres = (struct dlm_migratable_lockres *)data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) res = item->u.ml.lockres;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) real_master = item->u.ml.real_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) extra_ref = item->u.ml.extra_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) /* this case is super-rare. only occurs if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) * node death happens during migration. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) ret = dlm_lockres_master_requery(dlm, res, &real_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) mlog(0, "dlm_lockres_master_requery ret=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) mlog(0, "lockres %.*s not claimed. "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) "this node will take it.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) dlm_lockres_drop_inflight_ref(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) mlog(0, "master needs to respond to sender "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) "that node %u still owns %.*s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) real_master, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) /* cannot touch this lockres */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) ret = dlm_process_recovery_data(dlm, res, mres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) mlog(0, "dlm_process_recovery_data returned %d\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) mlog(0, "dlm_process_recovery_data succeeded\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) if ((mres->flags & (DLM_MRES_MIGRATION|DLM_MRES_ALL_DONE)) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) (DLM_MRES_MIGRATION|DLM_MRES_ALL_DONE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) ret = dlm_finish_migration(dlm, res, mres->master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) /* See comment in dlm_mig_lockres_handler() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) if (extra_ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) kfree(data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) u8 *real_master)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) struct dlm_node_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) int nodenum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) *real_master = DLM_LOCK_RES_OWNER_UNKNOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) /* we only reach here if one of the two nodes in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) * migration died while the migration was in progress.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) * at this point we need to requery the master. we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) * know that the new_master got as far as creating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) * an mle on at least one node, but we do not know
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) * if any nodes had actually cleared the mle and set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) * the master to the new_master. the old master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) * is supposed to set the owner to UNKNOWN in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) * event of a new_master death, so the only possible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) * responses that we can get from nodes here are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) * that the master is new_master, or that the master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) * is UNKNOWN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) * if all nodes come back with UNKNOWN then we know
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) * the lock needs remastering here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) * if any node comes back with a valid master, check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) * to see if that master is the one that we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) * recovering. if so, then the new_master died and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) * we need to remaster this lock. if not, then the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) * new_master survived and that node will respond to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) * other nodes about the owner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) * if there is an owner, this node needs to dump this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) * lockres and alert the sender that this lockres
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) * was rejected. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) dlm_node_iter_init(dlm->domain_map, &iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) /* do not send to self */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) if (nodenum == dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) ret = dlm_do_master_requery(dlm, res, nodenum, real_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) if (!dlm_is_host_down(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) /* host is down, so answer for that node would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) mlog(0, "lock master is %u\n", *real_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) u8 nodenum, u8 *real_master)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) struct dlm_master_requery req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) int status = DLM_LOCK_RES_OWNER_UNKNOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) memset(&req, 0, sizeof(req));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) req.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) req.namelen = res->lockname.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) memcpy(req.name, res->lockname.name, res->lockname.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) resend:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) ret = o2net_send_message(DLM_MASTER_REQUERY_MSG, dlm->key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) &req, sizeof(req), nodenum, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) mlog(ML_ERROR, "Error %d when sending message %u (key "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) "0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) dlm->key, nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) else if (status == -ENOMEM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) msleep(50);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) goto resend;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) BUG_ON(status < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) *real_master = (u8) (status & 0xff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) mlog(0, "node %u responded to master requery with %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) nodenum, *real_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) /* this function cannot error, so unless the sending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) * or receiving of the message failed, the owner can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) * be trusted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) struct dlm_lock_resource *res = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) unsigned int hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) int master = DLM_LOCK_RES_OWNER_UNKNOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) u32 flags = DLM_ASSERT_MASTER_REQUERY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) int dispatched = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) if (!dlm_grab(dlm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) /* since the domain has gone away on this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) * node, the proper response is UNKNOWN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) return master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) hash = dlm_lockid_hash(req->name, req->namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) res = __dlm_lookup_lockres(dlm, req->name, req->namelen, hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) master = res->owner;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) if (master == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) int ret = dlm_dispatch_assert_master(dlm, res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 0, 0, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) /* sender will take care of this and retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) dispatched = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) __dlm_lockres_grab_inflight_worker(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) /* put.. incase we are not the master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) if (!dispatched)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) return master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) static inline struct list_head *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) dlm_list_num_to_pointer(struct dlm_lock_resource *res, int list_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) struct list_head *ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) BUG_ON(list_num < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) BUG_ON(list_num > 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) ret = &(res->granted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) ret += list_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) /* TODO: do ast flush business
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) * TODO: do MIGRATING and RECOVERING spinning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) * NOTE about in-flight requests during migration:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) * Before attempting the migrate, the master has marked the lockres as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) * MIGRATING and then flushed all of its pending ASTS. So any in-flight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) * requests either got queued before the MIGRATING flag got set, in which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) * case the lock data will reflect the change and a return message is on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) * the way, or the request failed to get in before MIGRATING got set. In
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) * this case, the caller will be told to spin and wait for the MIGRATING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) * flag to be dropped, then recheck the master.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) * This holds true for the convert, cancel and unlock cases, and since lvb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) * updates are tied to these same messages, it applies to lvb updates as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) * well. For the lock case, there is no way a lock can be on the master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) * queue and not be on the secondary queue since the lock is always added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) * locally first. This means that the new target node will never be sent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) * a lock that he doesn't already have on the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) * In total, this means that the local lock is correct and should not be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) * updated to match the one sent by the master. Any messages sent back
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) * from the master before the MIGRATING flag will bring the lock properly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) * up-to-date, and the change will be ordered properly for the waiter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) * We will *not* attempt to modify the lock underneath the waiter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) struct dlm_migratable_lockres *mres)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) struct dlm_migratable_lock *ml;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) struct list_head *queue, *iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) struct list_head *tmpq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) struct dlm_lock *newlock = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) struct dlm_lockstatus *lksb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) int i, j, bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) struct dlm_lock *lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) u8 from = O2NM_MAX_NODES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) __be64 c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) mlog(0, "running %d locks for this lockres\n", mres->num_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) for (i=0; i<mres->num_locks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) ml = &(mres->ml[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) if (dlm_is_dummy_lock(dlm, ml, &from)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) /* placeholder, just need to set the refmap bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) BUG_ON(mres->num_locks != 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) mlog(0, "%s:%.*s: dummy lock for %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) dlm->name, mres->lockname_len, mres->lockname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) dlm_lockres_set_refmap_bit(dlm, res, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) BUG_ON(ml->highest_blocked != LKM_IVMODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) newlock = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) lksb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) queue = dlm_list_num_to_pointer(res, ml->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) tmpq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) /* if the lock is for the local node it needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) * be moved to the proper location within the queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) * do not allocate a new lock structure. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) if (ml->node == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) /* MIGRATION ONLY! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) BUG_ON(!(mres->flags & DLM_MRES_MIGRATION));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) lock = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) tmpq = dlm_list_idx_to_ptr(res, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) list_for_each(iter, tmpq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) lock = list_entry(iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) struct dlm_lock, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) if (lock->ml.cookie == ml->cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) lock = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) if (lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) /* lock is always created locally first, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) * destroyed locally last. it must be on the list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) if (!lock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) c = ml->cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) mlog(ML_ERROR, "Could not find local lock "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) "with cookie %u:%llu, node %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) "list %u, flags 0x%x, type %d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) "conv %d, highest blocked %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) dlm_get_lock_cookie_node(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) dlm_get_lock_cookie_seq(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) ml->node, ml->list, ml->flags, ml->type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) ml->convert_type, ml->highest_blocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) __dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) if (lock->ml.node != ml->node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) c = lock->ml.cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) mlog(ML_ERROR, "Mismatched node# in lock "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) "cookie %u:%llu, name %.*s, node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) dlm_get_lock_cookie_node(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) dlm_get_lock_cookie_seq(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) lock->ml.node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) c = ml->cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) mlog(ML_ERROR, "Migrate lock cookie %u:%llu, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) "node %u, list %u, flags 0x%x, type %d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) "conv %d, highest blocked %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) dlm_get_lock_cookie_node(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) dlm_get_lock_cookie_seq(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) ml->node, ml->list, ml->flags, ml->type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) ml->convert_type, ml->highest_blocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) __dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) if (tmpq != queue) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) c = ml->cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) mlog(0, "Lock cookie %u:%llu was on list %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) "instead of list %u for %.*s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) dlm_get_lock_cookie_node(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) dlm_get_lock_cookie_seq(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) j, ml->list, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) __dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) /* see NOTE above about why we do not update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) * to match the master here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) /* move the lock to its proper place */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) /* do not alter lock refcount. switching lists. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) list_move_tail(&lock->list, queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) mlog(0, "just reordered a local lock!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) /* lock is for another node. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) newlock = dlm_new_lock(ml->type, ml->node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) be64_to_cpu(ml->cookie), NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) if (!newlock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) lksb = newlock->lksb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) dlm_lock_attach_lockres(newlock, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) if (ml->convert_type != LKM_IVMODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) BUG_ON(queue != &res->converting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) newlock->ml.convert_type = ml->convert_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) lksb->flags |= (ml->flags &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) if (ml->type == LKM_NLMODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) goto skip_lvb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) * If the lock is in the blocked list it can't have a valid lvb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) * so skip it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) if (ml->list == DLM_BLOCKED_LIST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) goto skip_lvb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) if (!dlm_lvb_is_empty(mres->lvb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) if (lksb->flags & DLM_LKSB_PUT_LVB) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) /* other node was trying to update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) * lvb when node died. recreate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) * lksb with the updated lvb. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) memcpy(lksb->lvb, mres->lvb, DLM_LVB_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) /* the lock resource lvb update must happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) * NOW, before the spinlock is dropped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) * we no longer wait for the AST to update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) * the lvb. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) /* otherwise, the node is sending its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) * most recent valid lvb info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) BUG_ON(ml->type != LKM_EXMODE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) ml->type != LKM_PRMODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) if (!dlm_lvb_is_empty(res->lvb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) (ml->type == LKM_EXMODE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) memcmp(res->lvb, mres->lvb, DLM_LVB_LEN))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) mlog(ML_ERROR, "%s:%.*s: received bad "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) "lvb! type=%d\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) res->lockname.name, ml->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) printk("lockres lvb=[");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) for (i=0; i<DLM_LVB_LEN; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) printk("%02x", res->lvb[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) printk("]\nmigrated lvb=[");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) for (i=0; i<DLM_LVB_LEN; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) printk("%02x", mres->lvb[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) printk("]\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) skip_lvb:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) /* NOTE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) * wrt lock queue ordering and recovery:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) * 1. order of locks on granted queue is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) * meaningless.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) * 2. order of locks on converting queue is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) * LOST with the node death. sorry charlie.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) * 3. order of locks on the blocked queue is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) * also LOST.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) * order of locks does not affect integrity, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) * just means that a lock request may get pushed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) * back in line as a result of the node death.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) * also note that for a given node the lock order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) * for its secondary queue locks is preserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) * relative to each other, but clearly *not*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) * preserved relative to locks from other nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) bad = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) list_for_each_entry(lock, queue, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) if (lock->ml.cookie == ml->cookie) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) c = lock->ml.cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) "exists on this lockres!\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) dlm_get_lock_cookie_node(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) dlm_get_lock_cookie_seq(be64_to_cpu(c)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) "node=%u, cookie=%u:%llu, queue=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) ml->type, ml->convert_type, ml->node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) dlm_get_lock_cookie_node(be64_to_cpu(ml->cookie)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) dlm_get_lock_cookie_seq(be64_to_cpu(ml->cookie)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) ml->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) __dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) bad = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) if (!bad) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) dlm_lock_get(newlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) if (mres->flags & DLM_MRES_RECOVERY &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) ml->list == DLM_CONVERTING_LIST &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) newlock->ml.type >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) newlock->ml.convert_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) /* newlock is doing downconvert, add it to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) * head of converting list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) list_add(&newlock->list, queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) list_add_tail(&newlock->list, queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) mlog(0, "%s:%.*s: added lock for node %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) "setting refmap bit\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) res->lockname.len, res->lockname.name, ml->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) dlm_lockres_set_refmap_bit(dlm, res, ml->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) mlog(0, "done running all the locks\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) /* balance the ref taken when the work was queued */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) dlm_lockres_drop_inflight_ref(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) struct list_head *queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) struct dlm_lock *lock, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) res->state |= DLM_LOCK_RES_RECOVERING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) if (!list_empty(&res->recovering)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) mlog(0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) "Recovering res %s:%.*s, is already on recovery list!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) dlm->name, res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) list_del_init(&res->recovering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) /* We need to hold a reference while on the recovery list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) dlm_lockres_get(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) list_add_tail(&res->recovering, &dlm->reco.resources);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) /* find any pending locks and put them back on proper list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) queue = dlm_list_idx_to_ptr(res, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) list_for_each_entry_safe(lock, next, queue, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) dlm_lock_get(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) if (lock->convert_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) /* move converting lock back to granted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) mlog(0, "node died with convert pending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) "on %.*s. move back to granted list.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) dlm_revert_pending_convert(res, lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) lock->convert_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) } else if (lock->lock_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) /* remove pending lock requests completely */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) BUG_ON(i != DLM_BLOCKED_LIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) mlog(0, "node died with lock pending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) "on %.*s. remove from blocked list and skip.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) /* lock will be floating until ref in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) * dlmlock_remote is freed after the network
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) * call returns. ok for it to not be on any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) * list since no ast can be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) * (the master is dead). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) dlm_revert_pending_lock(res, lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) lock->lock_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) } else if (lock->unlock_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) /* if an unlock was in progress, treat as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) * if this had completed successfully
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) * before sending this lock state to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) * new master. note that the dlm_unlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) * call is still responsible for calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) * the unlockast. that will happen after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) * the network call times out. for now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) * just move lists to prepare the new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) * recovery master. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) BUG_ON(i != DLM_GRANTED_LIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) mlog(0, "node died with unlock pending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) "on %.*s. remove from blocked list and skip.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) dlm_commit_pending_unlock(res, lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) lock->unlock_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) } else if (lock->cancel_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) /* if a cancel was in progress, treat as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) * if this had completed successfully
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) * before sending this lock state to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) * new master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) BUG_ON(i != DLM_CONVERTING_LIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) mlog(0, "node died with cancel pending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) "on %.*s. move back to granted list.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) dlm_commit_pending_cancel(res, lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) lock->cancel_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) /* removes all recovered locks from the recovery list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) * sets the res->owner to the new master.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) * unsets the RECOVERY flag and wakes waiters. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) u8 dead_node, u8 new_master)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) struct hlist_head *bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) struct dlm_lock_resource *res, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) if (res->owner == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) mlog(0, "%s: res %.*s, Changing owner from %u to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) dlm->name, res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) res->owner, new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) list_del_init(&res->recovering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) /* new_master has our reference from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) * the lock state sent during recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) dlm_change_lockres_owner(dlm, res, new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) res->state &= ~DLM_LOCK_RES_RECOVERING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) if (__dlm_lockres_has_locks(res))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) __dlm_dirty_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) /* this will become unnecessary eventually, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) * for now we need to run the whole hash, clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) * the RECOVERING state and set the owner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) * if necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) for (i = 0; i < DLM_HASH_BUCKETS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) bucket = dlm_lockres_hash(dlm, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) hlist_for_each_entry(res, bucket, hash_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) if (res->state & DLM_LOCK_RES_RECOVERY_WAITING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) res->state &= ~DLM_LOCK_RES_RECOVERY_WAITING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) if (!(res->state & DLM_LOCK_RES_RECOVERING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) if (res->owner != dead_node &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) res->owner != dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) if (!list_empty(&res->recovering)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) list_del_init(&res->recovering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) /* new_master has our reference from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) * the lock state sent during recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) mlog(0, "%s: res %.*s, Changing owner from %u to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) dlm->name, res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) res->owner, new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) dlm_change_lockres_owner(dlm, res, new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) res->state &= ~DLM_LOCK_RES_RECOVERING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) if (__dlm_lockres_has_locks(res))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) __dlm_dirty_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) static inline int dlm_lvb_needs_invalidation(struct dlm_lock *lock, int local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) if (local) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) if (lock->ml.type != LKM_EXMODE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) lock->ml.type != LKM_PRMODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) } else if (lock->ml.type == LKM_EXMODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) struct dlm_lock_resource *res, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) struct list_head *queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) struct dlm_lock *lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) int blank_lvb = 0, local = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) u8 search_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) if (res->owner == dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) /* if this node owned the lockres, and if the dead node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) * had an EX when he died, blank out the lvb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) search_node = dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) /* if this is a secondary lockres, and we had no EX or PR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) * locks granted, we can no longer trust the lvb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) search_node = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) local = 1; /* check local state for valid lvb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) queue = dlm_list_idx_to_ptr(res, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) list_for_each_entry(lock, queue, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) if (lock->ml.node == search_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) if (dlm_lvb_needs_invalidation(lock, local)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) /* zero the lksb lvb and lockres lvb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) blank_lvb = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) memset(lock->lksb->lvb, 0, DLM_LVB_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) if (blank_lvb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) mlog(0, "clearing %.*s lvb, dead node %u had EX\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) res->lockname.len, res->lockname.name, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) memset(res->lvb, 0, DLM_LVB_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) struct dlm_lock_resource *res, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) struct dlm_lock *lock, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) unsigned int freed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) /* this node is the lockres master:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) * 1) remove any stale locks for the dead node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) * 2) if the dead node had an EX when he died, blank out the lvb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) /* We do two dlm_lock_put(). One for removing from list and the other is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) * to force the DLM_UNLOCK_FREE_LOCK action so as to free the locks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) /* TODO: check pending_asts, pending_basts here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) list_for_each_entry_safe(lock, next, &res->granted, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) if (lock->ml.node == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) list_del_init(&lock->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) list_for_each_entry_safe(lock, next, &res->converting, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) if (lock->ml.node == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) list_del_init(&lock->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) list_for_each_entry_safe(lock, next, &res->blocked, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) if (lock->ml.node == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) list_del_init(&lock->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) if (freed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) mlog(0, "%s:%.*s: freed %u locks for dead node %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) "dropping ref from lockres\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) res->lockname.len, res->lockname.name, freed, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) if(!test_bit(dead_node, res->refmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) mlog(ML_ERROR, "%s:%.*s: freed %u locks for dead node %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) "but ref was not set\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) res->lockname.len, res->lockname.name, freed, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) __dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) res->state |= DLM_LOCK_RES_RECOVERY_WAITING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) } else if (test_bit(dead_node, res->refmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) mlog(0, "%s:%.*s: dead node %u had a ref, but had "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) "no locks and had not purged before dying\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) res->lockname.len, res->lockname.name, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) /* do not kick thread yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) __dlm_dirty_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) struct hlist_head *bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) struct hlist_node *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) struct dlm_lock *lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) /* purge any stale mles */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) dlm_clean_master_list(dlm, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) * now clean up all lock resources. there are two rules:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) * 1) if the dead node was the master, move the lockres
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) * to the recovering list. set the RECOVERING flag.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) * this lockres needs to be cleaned up before it can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) * be used further.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) * 2) if this node was the master, remove all locks from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) * each of the lockres queues that were owned by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) * dead node. once recovery finishes, the dlm thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) * can be kicked again to see if any ASTs or BASTs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) * need to be fired as a result.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) for (i = 0; i < DLM_HASH_BUCKETS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) bucket = dlm_lockres_hash(dlm, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) hlist_for_each_entry_safe(res, tmp, bucket, hash_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) /* always prune any $RECOVERY entries for dead nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) * otherwise hangs can occur during later recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) if (dlm_is_recovery_lock(res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) res->lockname.len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) list_for_each_entry(lock, &res->granted, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) if (lock->ml.node == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) mlog(0, "AHA! there was "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) "a $RECOVERY lock for dead "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) "node %u (%s)!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) dead_node, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) list_del_init(&lock->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) /* Can't schedule
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) * DLM_UNLOCK_FREE_LOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) * - do manually */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) if ((res->owner == dead_node) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) (res->state & DLM_LOCK_RES_DROPPING_REF)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) dlm_lockres_get(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) __dlm_do_purge_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) } else if (res->owner == dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) /* zero the lvb if necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) dlm_revalidate_lvb(dlm, res, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) if (res->owner == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) if (res->state & DLM_LOCK_RES_DROPPING_REF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) mlog(0, "%s:%.*s: owned by "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) "dead node %u, this node was "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) "dropping its ref when master died. "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) "continue, purging the lockres.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) dlm->name, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) res->lockname.name, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) dlm_lockres_get(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) __dlm_do_purge_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) dlm_move_lockres_to_recovery_list(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) } else if (res->owner == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) dlm_free_dead_locks(dlm, res, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) __dlm_lockres_calc_usage(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) if (test_bit(dead_node, res->refmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) mlog(0, "%s:%.*s: dead node %u had a ref, but had "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) "no locks and had not purged before dying\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) dlm->name, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) res->lockname.name, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) if (dlm->reco.new_master == idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) mlog(0, "%s: recovery master %d just died\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) dlm->name, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) /* finalize1 was reached, so it is safe to clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) * the new_master and dead_node. that recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) * is complete. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) mlog(0, "%s: dead master %d had reached "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) "finalize1 state, clearing\n", dlm->name, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) __dlm_reset_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) /* Clean up join state on node death. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) if (dlm->joining_node == idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) mlog(0, "Clearing join state for node %u\n", idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) /* check to see if the node is already considered dead */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) if (!test_bit(idx, dlm->live_nodes_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) mlog(0, "for domain %s, node %d is already dead. "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) "another node likely did recovery already.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) dlm->name, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) /* check to see if we do not care about this node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) if (!test_bit(idx, dlm->domain_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) /* This also catches the case that we get a node down
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) * but haven't joined the domain yet. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) mlog(0, "node %u already removed from domain!\n", idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) clear_bit(idx, dlm->live_nodes_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) /* make sure local cleanup occurs before the heartbeat events */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) if (!test_bit(idx, dlm->recovery_map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) dlm_do_local_recovery_cleanup(dlm, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) /* notify anything attached to the heartbeat events */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) dlm_hb_event_notify_attached(dlm, idx, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) mlog(0, "node %u being removed from domain map!\n", idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) clear_bit(idx, dlm->domain_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) clear_bit(idx, dlm->exit_domain_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) /* wake up migration waiters if a node goes down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) * perhaps later we can genericize this for other waiters. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) wake_up(&dlm->migration_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) set_bit(idx, dlm->recovery_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) * This will notify any dlm users that a node in our domain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) * went away without notifying us first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) if (test_bit(idx, dlm->domain_map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) dlm_fire_domain_eviction_callbacks(dlm, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) __dlm_hb_node_down(dlm, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) set_bit(idx, dlm->live_nodes_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) /* do NOT notify mle attached to the heartbeat events.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) * new nodes are not interesting in mastery until joined. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) static void dlm_reco_ast(void *astdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) struct dlm_ctxt *dlm = astdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) mlog(0, "ast for recovery lock fired!, this=%u, dlm=%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) dlm->node_num, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) static void dlm_reco_bast(void *astdata, int blocked_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) struct dlm_ctxt *dlm = astdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) mlog(0, "bast for recovery lock fired!, this=%u, dlm=%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) dlm->node_num, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) mlog(0, "unlockast for recovery lock fired!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) * dlm_pick_recovery_master will continually attempt to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) * dlmlock() on the special "$RECOVERY" lockres with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) * LKM_NOQUEUE flag to get an EX. every thread that enters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) * this function on each node racing to become the recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) * master will not stop attempting this until either:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) * a) this node gets the EX (and becomes the recovery master),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) * or b) dlm->reco.new_master gets set to some nodenum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) * != O2NM_INVALID_NODE_NUM (another node will do the reco).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) * so each time a recovery master is needed, the entire cluster
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) * will sync at this point. if the new master dies, that will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) * be detected in dlm_do_recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) static int dlm_pick_recovery_master(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) enum dlm_status ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) struct dlm_lockstatus lksb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) int status = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) memset(&lksb, 0, sizeof(lksb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) DLM_RECOVERY_LOCK_NAME, DLM_RECOVERY_LOCK_NAME_LEN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) dlm_reco_ast, dlm, dlm_reco_bast);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) dlm->name, ret, lksb.status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) if (ret == DLM_NORMAL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) dlm->name, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) /* got the EX lock. check to see if another node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) * just became the reco master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) if (dlm_reco_master_ready(dlm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) mlog(0, "%s: got reco EX lock, but %u will "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) "do the recovery\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) status = -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) /* see if recovery was already finished elsewhere */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) status = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) mlog(0, "%s: got reco EX lock, but "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) "node got recovered already\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) mlog(ML_ERROR, "%s: new master is %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) "but no dead node!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) dlm->name, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) /* if this node has actually become the recovery master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) * set the master and send the messages to begin recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) if (!status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) mlog(0, "%s: dead=%u, this=%u, sending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) "begin_reco now\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) dlm->reco.dead_node, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) status = dlm_send_begin_reco_message(dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) /* this always succeeds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) BUG_ON(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) /* set the new_master to this node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) dlm_set_reco_master(dlm, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) /* recovery lock is a special case. ast will not get fired,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) * so just go ahead and unlock it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) ret = dlmunlock(dlm, &lksb, 0, dlm_reco_unlock_ast, dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) if (ret == DLM_DENIED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) mlog(0, "got DLM_DENIED, trying LKM_CANCEL\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) ret = dlmunlock(dlm, &lksb, LKM_CANCEL, dlm_reco_unlock_ast, dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) if (ret != DLM_NORMAL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) /* this would really suck. this could only happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) * if there was a network error during the unlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) * because of node death. this means the unlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) * is actually "done" and the lock structure is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) * even freed. we can continue, but only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) * because this specific lock name is special. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) mlog(ML_ERROR, "dlmunlock returned %d\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) } else if (ret == DLM_NOTQUEUED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) dlm->name, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) /* another node is master. wait on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) * reco.new_master != O2NM_INVALID_NODE_NUM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) * for at most one second */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) wait_event_timeout(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) dlm_reco_master_ready(dlm),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) msecs_to_jiffies(1000));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) if (!dlm_reco_master_ready(dlm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) mlog(0, "%s: reco master taking awhile\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) /* another node has informed this one that it is reco master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) mlog(0, "%s: reco master %u is ready to recover %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) dlm->name, dlm->reco.new_master, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) status = -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) } else if (ret == DLM_RECOVERING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) mlog(0, "dlm=%s dlmlock says master node died (this=%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) dlm->name, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) /* dlmlock returned something other than NOTQUEUED or NORMAL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) mlog(ML_ERROR, "%s: got %s from dlmlock($RECOVERY), "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) "lksb.status=%s\n", dlm->name, dlm_errname(ret),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) dlm_errname(lksb.status));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) DLM_RECOVERY_LOCK_NAME_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) mlog(ML_ERROR, "recovery lock not found\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) struct dlm_begin_reco br;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) struct dlm_node_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) int nodenum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) mlog(0, "%s: dead node is %u\n", dlm->name, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) dlm_node_iter_init(dlm->domain_map, &iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) clear_bit(dead_node, iter.node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) memset(&br, 0, sizeof(br));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) br.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) br.dead_node = dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) if (nodenum == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) mlog(0, "not sending begin reco to dead node "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) "%u\n", dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) if (nodenum == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) mlog(0, "not sending begin reco to self\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) mlog(0, "attempting to send begin reco msg to %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) ret = o2net_send_message(DLM_BEGIN_RECO_MSG, dlm->key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) &br, sizeof(br), nodenum, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) /* negative status is handled ok by caller here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) if (ret >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) ret = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) if (dlm_is_host_down(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) /* node is down. not involved in recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) * so just keep going */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) mlog(ML_NOTICE, "%s: node %u was down when sending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) "begin reco msg (%d)\n", dlm->name, nodenum, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) * Prior to commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) * dlm_begin_reco_handler() returned EAGAIN and not -EAGAIN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) * We are handling both for compatibility reasons.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) if (ret == -EAGAIN || ret == EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) mlog(0, "%s: trying to start recovery of node "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) "%u, but node %u is waiting for last recovery "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) "to complete, backoff for a bit\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) dead_node, nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) msleep(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) /* this is now a serious problem, possibly ENOMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) * in the network stack. must retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) mlog(ML_ERROR, "begin reco of dlm %s to node %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) "returned %d\n", dlm->name, nodenum, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) DLM_RECOVERY_LOCK_NAME_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) mlog(ML_ERROR, "recovery lock not found\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) /* sleep for a bit in hopes that we can avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) * another ENOMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) msleep(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) struct dlm_begin_reco *br = (struct dlm_begin_reco *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) /* ok to return 0, domain has gone away */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) mlog(0, "%s: node %u wants to recover node %u (%u:%u) "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) "but this node is in finalize state, waiting on finalize2\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) dlm->name, br->node_idx, br->dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) mlog(0, "%s: node %u wants to recover node %u (%u:%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) dlm->name, br->node_idx, br->dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) dlm_fire_domain_eviction_callbacks(dlm, br->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) if (test_bit(dlm->reco.new_master, dlm->recovery_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) mlog(0, "%s: new_master %u died, changing "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) "to %u\n", dlm->name, dlm->reco.new_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) br->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) mlog(0, "%s: new_master %u NOT DEAD, changing "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) "to %u\n", dlm->name, dlm->reco.new_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) br->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) /* may not have seen the new master as dead yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) mlog(ML_NOTICE, "%s: dead_node previously set to %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) "node %u changing it to %u\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) dlm->reco.dead_node, br->node_idx, br->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) dlm_set_reco_master(dlm, br->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) dlm_set_reco_dead_node(dlm, br->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) if (!test_bit(br->dead_node, dlm->recovery_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) mlog(0, "recovery master %u sees %u as dead, but this "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) "node has not yet. marking %u as dead\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) br->node_idx, br->dead_node, br->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) if (!test_bit(br->dead_node, dlm->domain_map) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) !test_bit(br->dead_node, dlm->live_nodes_map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) mlog(0, "%u not in domain/live_nodes map "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) "so setting it in reco map manually\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) br->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) /* force the recovery cleanup in __dlm_hb_node_down
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) * both of these will be cleared in a moment */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) set_bit(br->dead_node, dlm->domain_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) set_bit(br->dead_node, dlm->live_nodes_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) __dlm_hb_node_down(dlm, br->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) dlm_kick_recovery_thread(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) mlog(0, "%s: recovery started by node %u, for %u (%u:%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) dlm->name, br->node_idx, br->dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) #define DLM_FINALIZE_STAGE2 0x01
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) struct dlm_finalize_reco fr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) struct dlm_node_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) int nodenum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) int stage = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) mlog(0, "finishing recovery for node %s:%u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) "stage %d\n", dlm->name, dlm->reco.dead_node, stage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) dlm_node_iter_init(dlm->domain_map, &iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) stage2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) memset(&fr, 0, sizeof(fr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) fr.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) fr.dead_node = dlm->reco.dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) if (stage == 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) fr.flags |= DLM_FINALIZE_STAGE2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) if (nodenum == dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) &fr, sizeof(fr), nodenum, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) if (ret >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) ret = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) mlog(ML_ERROR, "Error %d when sending message %u (key "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) "0x%x) to node %u\n", ret, DLM_FINALIZE_RECO_MSG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) dlm->key, nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) if (dlm_is_host_down(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) /* this has no effect on this recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) * session, so set the status to zero to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) * finish out the last recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) mlog(ML_ERROR, "node %u went down after this "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) "node finished recovery.\n", nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) if (stage == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) /* reset the node_iter back to the top and send finalize2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) iter.curnode = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) stage = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) goto stage2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) int stage = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) /* ok to return 0, domain has gone away */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) if (fr->flags & DLM_FINALIZE_STAGE2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) stage = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) mlog(0, "%s: node %u finalizing recovery stage%d of "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) if (dlm->reco.new_master != fr->node_idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) mlog(ML_ERROR, "node %u sent recovery finalize msg, but node "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) "%u is supposed to be the new master, dead=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) fr->node_idx, dlm->reco.new_master, fr->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) if (dlm->reco.dead_node != fr->dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) mlog(ML_ERROR, "node %u sent recovery finalize msg for dead "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) "node %u, but node %u is supposed to be dead\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) fr->node_idx, fr->dead_node, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) switch (stage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) case 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) dlm_finish_local_lockres_recovery(dlm, fr->dead_node, fr->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) mlog(ML_ERROR, "%s: received finalize1 from "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) "new master %u for dead node %u, but "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) "this node has already received it!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) dlm->name, fr->node_idx, fr->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) dlm_print_reco_node_status(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) case 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) if (!(dlm->reco.state & DLM_RECO_STATE_FINALIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) mlog(ML_ERROR, "%s: received finalize2 from "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) "new master %u for dead node %u, but "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) "this node did not have finalize1!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) dlm->name, fr->node_idx, fr->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) dlm_print_reco_node_status(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) __dlm_reset_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) dlm_kick_recovery_thread(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) dlm->name, fr->node_idx, dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) }