Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /* -*- mode: c; c-basic-offset: 8; -*-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * vim: noexpandtab sw=8 ts=8 sts=0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  * dlmrecovery.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  * recovery stuff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  * Copyright (C) 2004 Oracle.  All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/sysctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <linux/socket.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <linux/inet.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <linux/timer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include "../cluster/heartbeat.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include "../cluster/nodemanager.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #include "../cluster/tcp.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #include "dlmapi.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) #include "dlmcommon.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) #include "dlmdomain.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_RECOVERY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) #include "../cluster/masklog.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) static int dlm_recovery_thread(void *data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) static int dlm_do_recovery(struct dlm_ctxt *dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) static int dlm_pick_recovery_master(struct dlm_ctxt *dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) static int dlm_request_all_locks(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) 				 u8 request_from, u8 dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) static inline int dlm_num_locks_in_lockres(struct dlm_lock_resource *res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) 					const char *lockname, int namelen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) 					int total_locks, u64 cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 					u8 flags, u8 master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 				    struct dlm_migratable_lockres *mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) 				    u8 send_to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 				    struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 				    int total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) 				     struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 				     struct dlm_migratable_lockres *mres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) static int dlm_send_all_done_msg(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 				 u8 dead_node, u8 send_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 					struct list_head *list, u8 dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 					      u8 dead_node, u8 new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) static void dlm_reco_ast(void *astdata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) static void dlm_reco_bast(void *astdata, int blocked_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) static void dlm_request_all_locks_worker(struct dlm_work_item *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 					 void *data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 				      struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 				      u8 *real_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) static u64 dlm_get_next_mig_cookie(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) static DEFINE_SPINLOCK(dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) static DEFINE_SPINLOCK(dlm_mig_cookie_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) static u64 dlm_mig_cookie = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) static u64 dlm_get_next_mig_cookie(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 	u64 c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	spin_lock(&dlm_mig_cookie_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	c = dlm_mig_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 	if (dlm_mig_cookie == (~0ULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 		dlm_mig_cookie = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 		dlm_mig_cookie++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 	spin_unlock(&dlm_mig_cookie_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	return c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) static inline void dlm_set_reco_dead_node(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 					  u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 	assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 	if (dlm->reco.dead_node != dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 		mlog(0, "%s: changing dead_node from %u to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 		     dlm->name, dlm->reco.dead_node, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	dlm->reco.dead_node = dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) static inline void dlm_set_reco_master(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 				       u8 master)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 	mlog(0, "%s: changing new_master from %u to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 	     dlm->name, dlm->reco.new_master, master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	dlm->reco.new_master = master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) static inline void __dlm_reset_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	clear_bit(dlm->reco.dead_node, dlm->recovery_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) static inline void dlm_reset_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	__dlm_reset_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) /* Worker function used during recovery. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) void dlm_dispatch_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 	struct dlm_ctxt *dlm =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 		container_of(work, struct dlm_ctxt, dispatched_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 	LIST_HEAD(tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	struct dlm_work_item *item, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	dlm_workfunc_t *workfunc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	int tot=0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	spin_lock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 	list_splice_init(&dlm->work_list, &tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	spin_unlock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	list_for_each_entry(item, &tmp_list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 		tot++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	mlog(0, "%s: work thread has %d work items\n", dlm->name, tot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	list_for_each_entry_safe(item, next, &tmp_list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 		workfunc = item->func;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 		list_del_init(&item->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 		/* already have ref on dlm to avoid having
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 		 * it disappear.  just double-check. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 		BUG_ON(item->dlm != dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 		/* this is allowed to sleep and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 		 * call network stuff */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 		workfunc(item, item->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 		dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 		kfree(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173)  * RECOVERY THREAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 	/* wake the recovery thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 	 * this will wake the reco thread in one of three places
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	 * 1) sleeping with no recovery happening
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 	 * 2) sleeping with recovery mastered elsewhere
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 	 * 3) recovery mastered here, waiting on reco data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 	wake_up(&dlm->dlm_reco_thread_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) /* Launch the recovery thread */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) int dlm_launch_recovery_thread(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	mlog(0, "starting dlm recovery thread...\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	dlm->dlm_reco_thread_task = kthread_run(dlm_recovery_thread, dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 			"dlm_reco-%s", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 	if (IS_ERR(dlm->dlm_reco_thread_task)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 		mlog_errno(PTR_ERR(dlm->dlm_reco_thread_task));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 		dlm->dlm_reco_thread_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) void dlm_complete_recovery_thread(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 	if (dlm->dlm_reco_thread_task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 		mlog(0, "waiting for dlm recovery thread to exit\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 		kthread_stop(dlm->dlm_reco_thread_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 		dlm->dlm_reco_thread_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215)  * this is lame, but here's how recovery works...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216)  * 1) all recovery threads cluster wide will work on recovering
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217)  *    ONE node at a time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218)  * 2) negotiate who will take over all the locks for the dead node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219)  *    thats right... ALL the locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220)  * 3) once a new master is chosen, everyone scans all locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221)  *    and moves aside those mastered by the dead guy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222)  * 4) each of these locks should be locked until recovery is done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223)  * 5) the new master collects up all of secondary lock queue info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224)  *    one lock at a time, forcing each node to communicate back
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225)  *    before continuing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226)  * 6) each secondary lock queue responds with the full known lock info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227)  * 7) once the new master has run all its locks, it sends a ALLDONE!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228)  *    message to everyone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229)  * 8) upon receiving this message, the secondary queue node unlocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230)  *    and responds to the ALLDONE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231)  * 9) once the new master gets responses from everyone, he unlocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232)  *    everything and recovery for this dead node is done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233)  *10) go back to 2) while there are still dead nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) static void dlm_print_reco_node_status(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 	struct dlm_reco_node_data *ndata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	mlog(ML_NOTICE, "%s(%d): recovery info, state=%s, dead=%u, master=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	     dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	     dlm->reco.state & DLM_RECO_STATE_ACTIVE ? "ACTIVE" : "inactive",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	     dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	list_for_each_entry(ndata, &dlm->reco.node_data, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 		char *st = "unknown";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 		switch (ndata->state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 			case DLM_RECO_NODE_DATA_INIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 				st = "init";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 			case DLM_RECO_NODE_DATA_REQUESTING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 				st = "requesting";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 			case DLM_RECO_NODE_DATA_DEAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 				st = "dead";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 			case DLM_RECO_NODE_DATA_RECEIVING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 				st = "receiving";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 			case DLM_RECO_NODE_DATA_REQUESTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 				st = "requested";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 			case DLM_RECO_NODE_DATA_DONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 				st = "done";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 			case DLM_RECO_NODE_DATA_FINALIZE_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 				st = "finalize-sent";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 			default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 				st = "bad";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 		mlog(ML_NOTICE, "%s: reco state, node %u, state=%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 		     dlm->name, ndata->node_num, st);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 	list_for_each_entry(res, &dlm->reco.resources, recovering) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 		mlog(ML_NOTICE, "%s: lockres %.*s on recovering list\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 		     dlm->name, res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) #define DLM_RECO_THREAD_TIMEOUT_MS (5 * 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) static int dlm_recovery_thread(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 	int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 	struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 	unsigned long timeout = msecs_to_jiffies(DLM_RECO_THREAD_TIMEOUT_MS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	mlog(0, "dlm thread running for %s...\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 	while (!kthread_should_stop()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 		if (dlm_domain_fully_joined(dlm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 			status = dlm_do_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 			if (status == -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 				/* do not sleep, recheck immediately. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 			if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 				mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 		wait_event_interruptible_timeout(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 						 kthread_should_stop(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 						 timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	mlog(0, "quitting DLM recovery thread\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) /* returns true when the recovery master has contacted us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) static int dlm_reco_master_ready(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 	int ready;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 	ready = (dlm->reco.new_master != O2NM_INVALID_NODE_NUM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	return ready;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) /* returns true if node is no longer in the domain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325)  * could be dead or just not joined */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 	int dead;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 	dead = !test_bit(node, dlm->domain_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 	return dead;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) /* returns true if node is no longer in the domain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336)  * could be dead or just not joined */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) static int dlm_is_node_recovered(struct dlm_ctxt *dlm, u8 node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	int recovered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 	recovered = !test_bit(node, dlm->recovery_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 	return recovered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) void dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 	if (dlm_is_node_dead(dlm, node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 	printk(KERN_NOTICE "o2dlm: Waiting on the death of node %u in "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	       "domain %s\n", node, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 	if (timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 		wait_event_timeout(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 				   dlm_is_node_dead(dlm, node),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 				   msecs_to_jiffies(timeout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 		wait_event(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 			   dlm_is_node_dead(dlm, node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) void dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 	if (dlm_is_node_recovered(dlm, node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 	printk(KERN_NOTICE "o2dlm: Waiting on the recovery of node %u in "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 	       "domain %s\n", node, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 	if (timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 		wait_event_timeout(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 				   dlm_is_node_recovered(dlm, node),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 				   msecs_to_jiffies(timeout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 		wait_event(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 			   dlm_is_node_recovered(dlm, node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) /* callers of the top-level api calls (dlmlock/dlmunlock) should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382)  * block on the dlm->reco.event when recovery is in progress.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383)  * the dlm recovery thread will set this state when it begins
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384)  * recovering a dead node (as the new master or not) and clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385)  * the state and wake as soon as all affected lock resources have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386)  * been marked with the RECOVERY flag */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) static int dlm_in_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 	int in_recovery;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	in_recovery = !!(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	return in_recovery;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	if (dlm_in_recovery(dlm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 		mlog(0, "%s: reco thread %d in recovery: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 		     "state=%d, master=%u, dead=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 		     dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 		     dlm->reco.state, dlm->reco.new_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 		     dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 	wait_event(dlm->reco.event, !dlm_in_recovery(dlm));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) static void dlm_begin_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 	assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 	BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 	       dlm->name, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 	dlm->reco.state |= DLM_RECO_STATE_ACTIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) static void dlm_end_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	BUG_ON(!(dlm->reco.state & DLM_RECO_STATE_ACTIVE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	dlm->reco.state &= ~DLM_RECO_STATE_ACTIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	printk(KERN_NOTICE "o2dlm: End recovery on domain %s\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	wake_up(&dlm->reco.event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) static void dlm_print_recovery_master(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	printk(KERN_NOTICE "o2dlm: Node %u (%s) is the Recovery Master for the "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 	       "dead node %u in domain %s\n", dlm->reco.new_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 	       (dlm->node_num == dlm->reco.new_master ? "me" : "he"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 	       dlm->reco.dead_node, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) static int dlm_do_recovery(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 	int status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	if (dlm->migrate_done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 		mlog(0, "%s: no need do recovery after migrating all "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 		     "lock resources\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 		spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 	/* check to see if the new master has died */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 	    test_bit(dlm->reco.new_master, dlm->recovery_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 		mlog(0, "new master %u died while recovering %u!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 		     dlm->reco.new_master, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 		/* unset the new_master, leave dead_node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 		dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 	/* select a target to recover */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 	if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 		int bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 		bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 		if (bit >= O2NM_MAX_NODES || bit < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 			dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 			dlm_set_reco_dead_node(dlm, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 	} else if (!test_bit(dlm->reco.dead_node, dlm->recovery_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 		/* BUG? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 		mlog(ML_ERROR, "dead_node %u no longer in recovery map!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 		     dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 		dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 	if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 		// mlog(0, "nothing to recover!  sleeping now!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 		spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 		/* return to main thread loop and sleep. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 	mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	     dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 	     dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 	/* take write barrier */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	/* (stops the list reshuffling thread, proxy ast handling) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 	dlm_begin_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	if (dlm->reco.new_master == dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 		goto master_here;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 	if (dlm->reco.new_master == O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 		/* choose a new master, returns 0 if this node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 		 * is the master, -EEXIST if it's another node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 		 * this does not return until a new master is chosen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 		 * or recovery completes entirely. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 		ret = dlm_pick_recovery_master(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 		if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 			/* already notified everyone.  go. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 			goto master_here;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 		mlog(0, "another node will master this recovery session.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	dlm_print_recovery_master(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	/* it is safe to start everything back up here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	 * because all of the dead node's lock resources
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	 * have been marked as in-recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 	dlm_end_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	/* sleep out in main dlm_recovery_thread loop. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) master_here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 	dlm_print_recovery_master(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 		/* we should never hit this anymore */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 		mlog(ML_ERROR, "%s: Error %d remastering locks for node %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 		     "retrying.\n", dlm->name, status, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 		/* yield a bit to allow any final network messages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 		 * to get handled on remaining nodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 		msleep(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 		/* success!  see if any other nodes need recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 		mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 		     dlm->name, dlm->reco.dead_node, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 		spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 		__dlm_reset_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 		dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 		spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 	dlm_end_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 	/* continue and look for another dead node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	int status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 	struct dlm_reco_node_data *ndata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 	int all_nodes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 	int destroy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	int pass = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 		/* we have become recovery master.  there is no escaping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 		 * this, so just keep trying until we get it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 		status = dlm_init_recovery_area(dlm, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 		if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 			mlog(ML_ERROR, "%s: failed to alloc recovery area, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 			     "retrying\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 			msleep(1000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 	} while (status != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 	/* safe to access the node data list without a lock, since this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 	 * process is the only one to change the list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 	list_for_each_entry(ndata, &dlm->reco.node_data, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 		BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 		ndata->state = DLM_RECO_NODE_DATA_REQUESTING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 		mlog(0, "%s: Requesting lock info from node %u\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 		     ndata->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 		if (ndata->node_num == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 			ndata->state = DLM_RECO_NODE_DATA_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 			status = dlm_request_all_locks(dlm, ndata->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 						       dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 			if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 				mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 				if (dlm_is_host_down(status)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 					/* node died, ignore it for recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 					status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 					ndata->state = DLM_RECO_NODE_DATA_DEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 					/* wait for the domain map to catch up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 					 * with the network state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 					wait_event_timeout(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 							   dlm_is_node_dead(dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 								ndata->node_num),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 							   msecs_to_jiffies(1000));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 					mlog(0, "waited 1 sec for %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 					     "dead? %s\n", ndata->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 					     dlm_is_node_dead(dlm, ndata->node_num) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 					     "yes" : "no");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 				} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 					/* -ENOMEM on the other node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 					mlog(0, "%s: node %u returned "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 					     "%d during recovery, retrying "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 					     "after a short wait\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 					     dlm->name, ndata->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 					     status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 					msleep(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 		} while (status != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 		spin_lock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 		switch (ndata->state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 			case DLM_RECO_NODE_DATA_INIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 			case DLM_RECO_NODE_DATA_FINALIZE_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 			case DLM_RECO_NODE_DATA_REQUESTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 				BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 			case DLM_RECO_NODE_DATA_DEAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 				mlog(0, "node %u died after requesting "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 				     "recovery info for node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 				     ndata->node_num, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 				/* fine.  don't need this node's info.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 				 * continue without it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 			case DLM_RECO_NODE_DATA_REQUESTING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 				ndata->state = DLM_RECO_NODE_DATA_REQUESTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 				mlog(0, "now receiving recovery data from "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 				     "node %u for dead node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 				     ndata->node_num, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 			case DLM_RECO_NODE_DATA_RECEIVING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 				mlog(0, "already receiving recovery data from "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 				     "node %u for dead node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 				     ndata->node_num, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 			case DLM_RECO_NODE_DATA_DONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 				mlog(0, "already DONE receiving recovery data "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 				     "from node %u for dead node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 				     ndata->node_num, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 		spin_unlock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 	mlog(0, "%s: Done requesting all lock info\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	/* nodes should be sending reco data now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 	 * just need to wait */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 		/* check all the nodes now to see if we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 		 * done, or if anyone died */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 		all_nodes_done = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 		spin_lock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 		list_for_each_entry(ndata, &dlm->reco.node_data, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 			mlog(0, "checking recovery state of node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 			     ndata->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 			switch (ndata->state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 				case DLM_RECO_NODE_DATA_INIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 				case DLM_RECO_NODE_DATA_REQUESTING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 					mlog(ML_ERROR, "bad ndata state for "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 					     "node %u: state=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 					     ndata->node_num, ndata->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 					BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 				case DLM_RECO_NODE_DATA_DEAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 					mlog(0, "node %u died after "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 					     "requesting recovery info for "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 					     "node %u\n", ndata->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 					     dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 				case DLM_RECO_NODE_DATA_RECEIVING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 				case DLM_RECO_NODE_DATA_REQUESTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 					mlog(0, "%s: node %u still in state %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 					     dlm->name, ndata->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 					     ndata->state==DLM_RECO_NODE_DATA_RECEIVING ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 					     "receiving" : "requested");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 					all_nodes_done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 				case DLM_RECO_NODE_DATA_DONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 					mlog(0, "%s: node %u state is done\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 					     dlm->name, ndata->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 				case DLM_RECO_NODE_DATA_FINALIZE_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 					mlog(0, "%s: node %u state is finalize\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 					     dlm->name, ndata->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 		spin_unlock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 		mlog(0, "pass #%d, all_nodes_done?: %s\n", ++pass,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 		     all_nodes_done?"yes":"no");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 		if (all_nodes_done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 			int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 			/* Set this flag on recovery master to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 			 * a new recovery for another dead node start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 			 * before the recovery is not done. That may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 			 * cause recovery hung.*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 			spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 			dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 			spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 			/* all nodes are now in DLM_RECO_NODE_DATA_DONE state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 	 		 * just send a finalize message to everyone and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	 		 * clean up */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 			mlog(0, "all nodes are done! send finalize\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 			ret = dlm_send_finalize_reco_message(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 			if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 				mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 			spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 			dlm_finish_local_lockres_recovery(dlm, dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 							  dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 			spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 			mlog(0, "should be done with recovery!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 			mlog(0, "finishing recovery of %s at %lu, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 			     "dead=%u, this=%u, new=%u\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 			     jiffies, dlm->reco.dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 			     dlm->node_num, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 			destroy = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 			status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 			/* rescan everything marked dirty along the way */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 			dlm_kick_thread(dlm, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 		/* wait to be signalled, with periodic timeout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 		 * to check for node death */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 		wait_event_interruptible_timeout(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 					 kthread_should_stop(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 					 msecs_to_jiffies(DLM_RECO_THREAD_TIMEOUT_MS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	if (destroy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 		dlm_destroy_recovery_area(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 	int num=0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 	struct dlm_reco_node_data *ndata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	memcpy(dlm->reco.node_map, dlm->domain_map, sizeof(dlm->domain_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 	/* nodes can only be removed (by dying) after dropping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	 * this lock, and death will be trapped later, so this should do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 	while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 		num = find_next_bit (dlm->reco.node_map, O2NM_MAX_NODES, num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 		if (num >= O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 		BUG_ON(num == dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 		ndata = kzalloc(sizeof(*ndata), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 		if (!ndata) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 			dlm_destroy_recovery_area(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 		ndata->node_num = num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 		ndata->state = DLM_RECO_NODE_DATA_INIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 		spin_lock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 		list_add_tail(&ndata->list, &dlm->reco.node_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 		spin_unlock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 		num++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 	struct dlm_reco_node_data *ndata, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 	LIST_HEAD(tmplist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	spin_lock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 	list_splice_init(&dlm->reco.node_data, &tmplist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 	spin_unlock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 	list_for_each_entry_safe(ndata, next, &tmplist, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 		list_del_init(&ndata->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 		kfree(ndata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 				 u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	struct dlm_lock_request lr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 	int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 	mlog(0, "\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 	mlog(0, "dlm_request_all_locks: dead node is %u, sending request "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 		  "to %u\n", dead_node, request_from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	memset(&lr, 0, sizeof(lr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 	lr.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	lr.dead_node = dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 	// send message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 	ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 				 &lr, sizeof(lr), request_from, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 	/* negative status is handled by caller */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 		mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 		     "to recover dead node %u\n", dlm->name, ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 		     request_from, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 		ret = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 	// return from here, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 	// sleep until all received or error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 				  void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	struct dlm_lock_request *lr = (struct dlm_lock_request *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 	char *buf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	struct dlm_work_item *item = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 	if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 	if (lr->dead_node != dlm->reco.dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 		mlog(ML_ERROR, "%s: node %u sent dead_node=%u, but local "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 		     "dead_node is %u\n", dlm->name, lr->node_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 		     lr->dead_node, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 		dlm_print_reco_node_status(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 		/* this is a hack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 		dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 	BUG_ON(lr->dead_node != dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 	item = kzalloc(sizeof(*item), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 	if (!item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 		dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 	/* this will get freed by dlm_request_all_locks_worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 	buf = (char *) __get_free_page(GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 	if (!buf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 		kfree(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 		dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 	/* queue up work for dlm_request_all_locks_worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 	dlm_grab(dlm);  /* get an extra ref for the work item */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	dlm_init_work_item(dlm, item, dlm_request_all_locks_worker, buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 	item->u.ral.reco_master = lr->node_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 	item->u.ral.dead_node = lr->dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 	spin_lock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	list_add_tail(&item->list, &dlm->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	spin_unlock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	queue_work(dlm->dlm_worker, &dlm->dispatched_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	struct dlm_migratable_lockres *mres;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 	struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 	struct dlm_ctxt *dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	LIST_HEAD(resources);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 	u8 dead_node, reco_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 	int skip_all_done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	dlm = item->dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 	dead_node = item->u.ral.dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 	reco_master = item->u.ral.reco_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 	mres = (struct dlm_migratable_lockres *)data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 	mlog(0, "%s: recovery worker started, dead=%u, master=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	     dlm->name, dead_node, reco_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	if (dead_node != dlm->reco.dead_node ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 	    reco_master != dlm->reco.new_master) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 		/* worker could have been created before the recovery master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 		 * died.  if so, do not continue, but do not error. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 		if (dlm->reco.new_master == O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 			mlog(ML_NOTICE, "%s: will not send recovery state, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 			     "recovery master %u died, thread=(dead=%u,mas=%u)"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 			     " current=(dead=%u,mas=%u)\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 			     reco_master, dead_node, reco_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 			     dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 			mlog(ML_NOTICE, "%s: reco state invalid: reco(dead=%u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 			     "master=%u), request(dead=%u, master=%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 			     dlm->name, dlm->reco.dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 			     dlm->reco.new_master, dead_node, reco_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 		goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	/* lock resources should have already been moved to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910)  	 * dlm->reco.resources list.  now move items from that list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911)  	 * to a temp list if the dead owner matches.  note that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	 * whole cluster recovers only one node at a time, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	 * can safely move UNKNOWN lock resources for each recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	 * session. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	dlm_move_reco_locks_to_list(dlm, &resources, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 	/* now we can begin blasting lockreses without the dlm lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	/* any errors returned will be due to the new_master dying,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 	 * the dlm_reco_thread should detect this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 	list_for_each_entry(res, &resources, recovering) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 		ret = dlm_send_one_lockres(dlm, res, mres, reco_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 				   	DLM_MRES_RECOVERY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 		if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 			mlog(ML_ERROR, "%s: node %u went down while sending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 			     "recovery state for dead node %u, ret=%d\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 			     reco_master, dead_node, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 			skip_all_done = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 	/* move the resources back to the list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 	list_splice_init(&resources, &dlm->reco.resources);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	if (!skip_all_done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 		ret = dlm_send_all_done_msg(dlm, dead_node, reco_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 		if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 			mlog(ML_ERROR, "%s: node %u went down while sending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 			     "recovery all-done for dead node %u, ret=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 			     dlm->name, reco_master, dead_node, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	free_page((unsigned long)data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 	int ret, tmpret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 	struct dlm_reco_data_done done_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 	memset(&done_msg, 0, sizeof(done_msg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 	done_msg.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 	done_msg.dead_node = dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 	mlog(0, "sending DATA DONE message to %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 	     "my node=%u, dead node=%u\n", send_to, done_msg.node_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 	     done_msg.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 	ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 				 sizeof(done_msg), send_to, &tmpret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 		mlog(ML_ERROR, "%s: Error %d send RECO_DATA_DONE to node %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 		     "to recover dead node %u\n", dlm->name, ret, send_to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 		     dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 		if (!dlm_is_host_down(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 			BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 		ret = tmpret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 			       void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 	struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 	struct dlm_reco_node_data *ndata = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 	int ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 	if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	mlog(0, "got DATA DONE: dead_node=%u, reco.dead_node=%u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 	     "node_idx=%u, this node=%u\n", done->dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	     dlm->reco.dead_node, done->node_idx, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 	mlog_bug_on_msg((done->dead_node != dlm->reco.dead_node),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 			"Got DATA DONE: dead_node=%u, reco.dead_node=%u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 			"node_idx=%u, this node=%u\n", done->dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 			dlm->reco.dead_node, done->node_idx, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 	spin_lock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 	list_for_each_entry(ndata, &dlm->reco.node_data, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 		if (ndata->node_num != done->node_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 		switch (ndata->state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 			/* should have moved beyond INIT but not to FINALIZE yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 			case DLM_RECO_NODE_DATA_INIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 			case DLM_RECO_NODE_DATA_DEAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 			case DLM_RECO_NODE_DATA_FINALIZE_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 				mlog(ML_ERROR, "bad ndata state for node %u:"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 				     " state=%d\n", ndata->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 				     ndata->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 				BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 			/* these states are possible at this point, anywhere along
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 			 * the line of recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 			case DLM_RECO_NODE_DATA_DONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 			case DLM_RECO_NODE_DATA_RECEIVING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 			case DLM_RECO_NODE_DATA_REQUESTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 			case DLM_RECO_NODE_DATA_REQUESTING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 				mlog(0, "node %u is DONE sending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 					  "recovery data!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 					  ndata->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 				ndata->state = DLM_RECO_NODE_DATA_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 				ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 	spin_unlock(&dlm_reco_state_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 	/* wake the recovery thread, some node is done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 	if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 		dlm_kick_recovery_thread(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 		mlog(ML_ERROR, "failed to find recovery node data for node "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 		     "%u\n", done->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 	dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	mlog(0, "leaving reco data done handler, ret=%d\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 					struct list_head *list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 				       	u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 	struct dlm_lock_resource *res, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 	struct dlm_lock *lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 		/* always prune any $RECOVERY entries for dead nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 		 * otherwise hangs can occur during later recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 		if (dlm_is_recovery_lock(res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 					 res->lockname.len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 			spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 			list_for_each_entry(lock, &res->granted, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 				if (lock->ml.node == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 					mlog(0, "AHA! there was "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 					     "a $RECOVERY lock for dead "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 					     "node %u (%s)!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 					     dead_node, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 					list_del_init(&lock->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 					dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 					/* Can't schedule DLM_UNLOCK_FREE_LOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 					 * - do manually */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 					dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 			spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 		if (res->owner == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 			mlog(0, "found lockres owned by dead node while "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 				  "doing recovery for node %u. sending it.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 				  dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 			list_move_tail(&res->recovering, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 		} else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 			mlog(0, "found UNKNOWN owner while doing recovery "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 				  "for node %u. sending it.\n", dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 			list_move_tail(&res->recovering, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) static inline int dlm_num_locks_in_lockres(struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 	int total_locks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	struct list_head *iter, *queue = &res->granted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 	for (i=0; i<3; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 		list_for_each(iter, queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 			total_locks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 		queue++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	return total_locks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 				      struct dlm_migratable_lockres *mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 				      u8 send_to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 				      struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 				      int total_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 	u64 mig_cookie = be64_to_cpu(mres->mig_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	int mres_total_locks = be32_to_cpu(mres->total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 	int ret = 0, status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 	u8 orig_flags = mres->flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	   orig_master = mres->master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 	BUG_ON(mres->num_locks > DLM_MAX_MIGRATABLE_LOCKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 	if (!mres->num_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 	/* add an all-done flag if we reached the last lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	orig_flags = mres->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	BUG_ON(total_locks > mres_total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 	if (total_locks == mres_total_locks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 		mres->flags |= DLM_MRES_ALL_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 	mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 	     dlm->name, res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 	     orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 	     send_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	/* send it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 				 struct_size(mres, ml, mres->num_locks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 				 send_to, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 		/* XXX: negative status is not handled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 		 * this will end up killing this node. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 		mlog(ML_ERROR, "%s: res %.*s, Error %d send MIG_LOCKRES to "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 		     "node %u (%s)\n", dlm->name, mres->lockname_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 		     mres->lockname, ret, send_to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 		     (orig_flags & DLM_MRES_MIGRATION ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 		      "migration" : "recovery"));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 		/* might get an -ENOMEM back here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 		ret = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 		if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 			mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 			if (ret == -EFAULT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 				mlog(ML_ERROR, "node %u told me to kill "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 				     "myself!\n", send_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 				BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 	/* zero and reinit the message buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 	dlm_init_migratable_lockres(mres, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 				    res->lockname.len, mres_total_locks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 				    mig_cookie, orig_flags, orig_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 					const char *lockname, int namelen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 					int total_locks, u64 cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 					u8 flags, u8 master)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 	/* mres here is one full page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	clear_page(mres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	mres->lockname_len = namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	memcpy(mres->lockname, lockname, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	mres->num_locks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	mres->total_locks = cpu_to_be32(total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	mres->mig_cookie = cpu_to_be64(cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	mres->flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 	mres->master = master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) static void dlm_prepare_lvb_for_migration(struct dlm_lock *lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 					  struct dlm_migratable_lockres *mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 					  int queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	if (!lock->lksb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	       return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 	/* Ignore lvb in all locks in the blocked list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 	if (queue == DLM_BLOCKED_LIST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 	/* Only consider lvbs in locks with granted EX or PR lock levels */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	if (lock->ml.type != LKM_EXMODE && lock->ml.type != LKM_PRMODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	if (dlm_lvb_is_empty(mres->lvb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 		memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	/* Ensure the lvb copied for migration matches in other valid locks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	if (!memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	mlog(ML_ERROR, "Mismatched lvb in lock cookie=%u:%llu, name=%.*s, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	     "node=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 	     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 	     lock->lockres->lockname.len, lock->lockres->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	     lock->ml.node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	dlm_print_one_lock_resource(lock->lockres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) /* returns 1 if this lock fills the network structure,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215)  * 0 otherwise */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) static int dlm_add_lock_to_array(struct dlm_lock *lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 				 struct dlm_migratable_lockres *mres, int queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	struct dlm_migratable_lock *ml;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 	int lock_num = mres->num_locks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 	ml = &(mres->ml[lock_num]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 	ml->cookie = lock->ml.cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 	ml->type = lock->ml.type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 	ml->convert_type = lock->ml.convert_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 	ml->highest_blocked = lock->ml.highest_blocked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 	ml->list = queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	if (lock->lksb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 		ml->flags = lock->lksb->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 		dlm_prepare_lvb_for_migration(lock, mres, queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 	ml->node = lock->ml.node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	mres->num_locks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	/* we reached the max, send this network message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	if (mres->num_locks == DLM_MAX_MIGRATABLE_LOCKS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) static void dlm_add_dummy_lock(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 			       struct dlm_migratable_lockres *mres)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 	struct dlm_lock dummy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	memset(&dummy, 0, sizeof(dummy));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	dummy.ml.cookie = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	dummy.ml.type = LKM_IVMODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	dummy.ml.convert_type = LKM_IVMODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 	dummy.ml.highest_blocked = LKM_IVMODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 	dummy.lksb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 	dummy.ml.node = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	dlm_add_lock_to_array(&dummy, mres, DLM_BLOCKED_LIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) static inline int dlm_is_dummy_lock(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 				    struct dlm_migratable_lock *ml,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 				    u8 *nodenum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 	if (unlikely(ml->cookie == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 	    ml->type == LKM_IVMODE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 	    ml->convert_type == LKM_IVMODE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	    ml->highest_blocked == LKM_IVMODE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	    ml->list == DLM_BLOCKED_LIST)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 		*nodenum = ml->node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 			 struct dlm_migratable_lockres *mres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 			 u8 send_to, u8 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	struct list_head *queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 	int total_locks, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	u64 mig_cookie = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	struct dlm_lock *lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	BUG_ON(!(flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 	mlog(0, "sending to %u\n", send_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 	total_locks = dlm_num_locks_in_lockres(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	if (total_locks > DLM_MAX_MIGRATABLE_LOCKS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 		/* rare, but possible */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 		mlog(0, "argh.  lockres has %d locks.  this will "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 			  "require more than one network packet to "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 			  "migrate\n", total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 		mig_cookie = dlm_get_next_mig_cookie();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 	dlm_init_migratable_lockres(mres, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 				    res->lockname.len, total_locks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 				    mig_cookie, flags, res->owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 	total_locks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 	for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 		queue = dlm_list_idx_to_ptr(res, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 		list_for_each_entry(lock, queue, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 			/* add another lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 			total_locks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 			if (!dlm_add_lock_to_array(lock, mres, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 			/* this filled the lock message,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 			 * we must send it immediately. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 			ret = dlm_send_mig_lockres_msg(dlm, mres, send_to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 						       res, total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 			if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 				goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 	if (total_locks == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 		/* send a dummy lock to indicate a mastery reference only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 		mlog(0, "%s:%.*s: sending dummy lock to %u, %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 		     dlm->name, res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 		     send_to, flags & DLM_MRES_RECOVERY ? "recovery" :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 		     "migration");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 		dlm_add_dummy_lock(dlm, mres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 	/* flush any remaining locks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 	ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 		goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 	mlog(ML_ERROR, "%s: dlm_send_mig_lockres_msg returned %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	     dlm->name, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 	if (!dlm_is_host_down(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	mlog(0, "%s: node %u went down while sending %s "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 	     "lockres %.*s\n", dlm->name, send_to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 	     flags & DLM_MRES_RECOVERY ?  "recovery" : "migration",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 	     res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342)  * this message will contain no more than one page worth of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343)  * recovery data, and it will work on only one lockres.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344)  * there may be many locks in this page, and we may need to wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345)  * for additional packets to complete all the locks (rare, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)  * possible).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349)  * NOTE: the allocation error cases here are scary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350)  * we really cannot afford to fail an alloc in recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351)  * do we spin?  returning an error only delays the problem really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 			    void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 	struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 	struct dlm_migratable_lockres *mres =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 		(struct dlm_migratable_lockres *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 	u8 real_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 	u8 extra_refs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 	char *buf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	struct dlm_work_item *item = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 	struct dlm_lock_resource *res = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 	unsigned int hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 	if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 	if (!dlm_joined(dlm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 		mlog(ML_ERROR, "Domain %s not joined! "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 			  "lockres %.*s, master %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 			  dlm->name, mres->lockname_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 			  mres->lockname, mres->master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 		dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 	BUG_ON(!(mres->flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 	real_master = mres->master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 	if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 		/* cannot migrate a lockres with no master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 		BUG_ON(!(mres->flags & DLM_MRES_RECOVERY));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 	mlog(0, "%s message received from node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 		  (mres->flags & DLM_MRES_RECOVERY) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 		  "recovery" : "migration", mres->master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 	if (mres->flags & DLM_MRES_ALL_DONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 		mlog(0, "all done flag.  all lockres data received!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 	ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 	buf = kmalloc(be16_to_cpu(msg->data_len), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 	item = kzalloc(sizeof(*item), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 	if (!buf || !item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 		goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 	/* lookup the lock to see if we have a secondary queue for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 	 * already...  just add the locks in and this will have its owner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 	 * and RECOVERY flag changed when it completes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 	hash = dlm_lockid_hash(mres->lockname, mres->lockname_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 	res = __dlm_lookup_lockres_full(dlm, mres->lockname, mres->lockname_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 			hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 	if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 	 	/* this will get a ref on res */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 		/* mark it as recovering/migrating and hash it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 		spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 		if (res->state & DLM_LOCK_RES_DROPPING_REF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 			mlog(0, "%s: node is attempting to migrate "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 				"lockres %.*s, but marked as dropping "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 				" ref!\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 				mres->lockname_len, mres->lockname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 			ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 			spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 			spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 			dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 			goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 		if (mres->flags & DLM_MRES_RECOVERY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 			res->state |= DLM_LOCK_RES_RECOVERING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 			if (res->state & DLM_LOCK_RES_MIGRATING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 				/* this is at least the second
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 				 * lockres message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 				mlog(0, "lock %.*s is already migrating\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 					  mres->lockname_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 					  mres->lockname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 			} else if (res->state & DLM_LOCK_RES_RECOVERING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 				/* caller should BUG */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 				mlog(ML_ERROR, "node is attempting to migrate "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 				     "lock %.*s, but marked as recovering!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 				     mres->lockname_len, mres->lockname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 				ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 				spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 				spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 				dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 				goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 			res->state |= DLM_LOCK_RES_MIGRATING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 		spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 		spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 		spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 		/* need to allocate, just like if it was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 		 * mastered here normally  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 		res = dlm_new_lockres(dlm, mres->lockname, mres->lockname_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 		if (!res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 			goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 		/* to match the ref that we would have gotten if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 		 * dlm_lookup_lockres had succeeded */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 		dlm_lockres_get(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 		/* mark it as recovering/migrating and hash it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 		if (mres->flags & DLM_MRES_RECOVERY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 			res->state |= DLM_LOCK_RES_RECOVERING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 			res->state |= DLM_LOCK_RES_MIGRATING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 		spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 		__dlm_insert_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 		spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 		/* Add an extra ref for this lock-less lockres lest the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 		 * dlm_thread purges it before we get the chance to add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 		 * locks to it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 		dlm_lockres_get(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 		/* There are three refs that need to be put.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 		 * 1. Taken above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 		 * 2. kref_init in dlm_new_lockres()->dlm_init_lockres().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 		 * 3. dlm_lookup_lockres()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 		 * The first one is handled at the end of this function. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 		 * other two are handled in the worker thread after locks have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 		 * been attached. Yes, we don't wait for purge time to match
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 		 * kref_init. The lockres will still have atleast one ref
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 		 * added because it is in the hash __dlm_insert_lockres() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 		extra_refs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 		/* now that the new lockres is inserted,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 		 * make it usable by other processes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 		spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 		res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 		spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 		wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 	/* at this point we have allocated everything we need,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 	 * and we have a hashed lockres with an extra ref and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 	 * the proper res->state flags. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 	spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 	/* drop this either when master requery finds a different master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	 * or when a lock is added by the recovery worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 	dlm_lockres_grab_inflight_ref(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 	if (mres->master == DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 		/* migration cannot have an unknown master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 		BUG_ON(!(mres->flags & DLM_MRES_RECOVERY));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 		mlog(0, "recovery has passed me a lockres with an "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 			  "unknown owner.. will need to requery: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 			  "%.*s\n", mres->lockname_len, mres->lockname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 		/* take a reference now to pin the lockres, drop it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 		 * when locks are added in the worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 		dlm_change_lockres_owner(dlm, res, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 	spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 	/* queue up work for dlm_mig_lockres_worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 	dlm_grab(dlm);  /* get an extra ref for the work item */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 	memcpy(buf, msg->buf, be16_to_cpu(msg->data_len));  /* copy the whole message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 	dlm_init_work_item(dlm, item, dlm_mig_lockres_worker, buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 	item->u.ml.lockres = res; /* already have a ref */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	item->u.ml.real_master = real_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 	item->u.ml.extra_ref = extra_refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 	spin_lock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 	list_add_tail(&item->list, &dlm->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 	spin_unlock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 	queue_work(dlm->dlm_worker, &dlm->dispatched_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 	/* One extra ref taken needs to be put here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 	if (extra_refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 		dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 	dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 		kfree(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 		kfree(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 		mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 	struct dlm_ctxt *dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 	struct dlm_migratable_lockres *mres;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 	struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 	u8 real_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 	u8 extra_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 	dlm = item->dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 	mres = (struct dlm_migratable_lockres *)data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 	res = item->u.ml.lockres;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 	real_master = item->u.ml.real_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 	extra_ref = item->u.ml.extra_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 	if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 		/* this case is super-rare. only occurs if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 		 * node death happens during migration. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 		ret = dlm_lockres_master_requery(dlm, res, &real_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 		if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 			mlog(0, "dlm_lockres_master_requery ret=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 				  ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 			goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 		if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 			mlog(0, "lockres %.*s not claimed.  "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 				   "this node will take it.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 				   res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 			spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 			dlm_lockres_drop_inflight_ref(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 			spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 			mlog(0, "master needs to respond to sender "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 				  "that node %u still owns %.*s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 				  real_master, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 				  res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 			/* cannot touch this lockres */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 			goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 	ret = dlm_process_recovery_data(dlm, res, mres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 		mlog(0, "dlm_process_recovery_data returned  %d\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 		mlog(0, "dlm_process_recovery_data succeeded\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 	if ((mres->flags & (DLM_MRES_MIGRATION|DLM_MRES_ALL_DONE)) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 	                   (DLM_MRES_MIGRATION|DLM_MRES_ALL_DONE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 		ret = dlm_finish_migration(dlm, res, mres->master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 		if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 			mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 	/* See comment in dlm_mig_lockres_handler() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 	if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 		if (extra_ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 			dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 		dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 	kfree(data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 				      struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 				      u8 *real_master)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	struct dlm_node_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 	int nodenum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 	*real_master = DLM_LOCK_RES_OWNER_UNKNOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 	/* we only reach here if one of the two nodes in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 	 * migration died while the migration was in progress.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 	 * at this point we need to requery the master.  we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 	 * know that the new_master got as far as creating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 	 * an mle on at least one node, but we do not know
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 	 * if any nodes had actually cleared the mle and set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 	 * the master to the new_master.  the old master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 	 * is supposed to set the owner to UNKNOWN in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 	 * event of a new_master death, so the only possible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 	 * responses that we can get from nodes here are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 	 * that the master is new_master, or that the master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 	 * is UNKNOWN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 	 * if all nodes come back with UNKNOWN then we know
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 	 * the lock needs remastering here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 	 * if any node comes back with a valid master, check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 	 * to see if that master is the one that we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 	 * recovering.  if so, then the new_master died and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 	 * we need to remaster this lock.  if not, then the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 	 * new_master survived and that node will respond to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 	 * other nodes about the owner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 	 * if there is an owner, this node needs to dump this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 	 * lockres and alert the sender that this lockres
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 	 * was rejected. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 	dlm_node_iter_init(dlm->domain_map, &iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 	while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 		/* do not send to self */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 		if (nodenum == dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 		ret = dlm_do_master_requery(dlm, res, nodenum, real_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 		if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 			mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 			if (!dlm_is_host_down(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 				BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 			/* host is down, so answer for that node would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 			 * DLM_LOCK_RES_OWNER_UNKNOWN.  continue. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 		if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 			mlog(0, "lock master is %u\n", *real_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 			  u8 nodenum, u8 *real_master)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 	struct dlm_master_requery req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 	int status = DLM_LOCK_RES_OWNER_UNKNOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 	memset(&req, 0, sizeof(req));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 	req.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 	req.namelen = res->lockname.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 	memcpy(req.name, res->lockname.name, res->lockname.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) resend:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 	ret = o2net_send_message(DLM_MASTER_REQUERY_MSG, dlm->key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 				 &req, sizeof(req), nodenum, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 		mlog(ML_ERROR, "Error %d when sending message %u (key "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 		     "0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 		     dlm->key, nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 	else if (status == -ENOMEM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 		mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 		msleep(50);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 		goto resend;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 		BUG_ON(status < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 		BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 		*real_master = (u8) (status & 0xff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 		mlog(0, "node %u responded to master requery with %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 			  nodenum, *real_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) /* this function cannot error, so unless the sending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704)  * or receiving of the message failed, the owner can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705)  * be trusted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 			       void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 	struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 	struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 	struct dlm_lock_resource *res = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) 	unsigned int hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 	int master = DLM_LOCK_RES_OWNER_UNKNOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 	u32 flags = DLM_ASSERT_MASTER_REQUERY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 	int dispatched = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 	if (!dlm_grab(dlm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 		/* since the domain has gone away on this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 		 * node, the proper response is UNKNOWN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 		return master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 	hash = dlm_lockid_hash(req->name, req->namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 	res = __dlm_lookup_lockres(dlm, req->name, req->namelen, hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 	if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 		spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 		master = res->owner;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 		if (master == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 			int ret = dlm_dispatch_assert_master(dlm, res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 							     0, 0, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 			if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 				mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 				spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 				dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 				spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 				dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 				/* sender will take care of this and retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 				return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 				dispatched = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 				__dlm_lockres_grab_inflight_worker(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 				spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 			/* put.. incase we are not the master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 			spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 			dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 	if (!dispatched)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 		dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 	return master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) static inline struct list_head *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) dlm_list_num_to_pointer(struct dlm_lock_resource *res, int list_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) 	struct list_head *ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 	BUG_ON(list_num < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 	BUG_ON(list_num > 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 	ret = &(res->granted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 	ret += list_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) /* TODO: do ast flush business
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770)  * TODO: do MIGRATING and RECOVERING spinning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) * NOTE about in-flight requests during migration:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) * Before attempting the migrate, the master has marked the lockres as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) * MIGRATING and then flushed all of its pending ASTS.  So any in-flight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) * requests either got queued before the MIGRATING flag got set, in which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) * case the lock data will reflect the change and a return message is on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) * the way, or the request failed to get in before MIGRATING got set.  In
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) * this case, the caller will be told to spin and wait for the MIGRATING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) * flag to be dropped, then recheck the master.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) * This holds true for the convert, cancel and unlock cases, and since lvb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) * updates are tied to these same messages, it applies to lvb updates as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) * well.  For the lock case, there is no way a lock can be on the master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) * queue and not be on the secondary queue since the lock is always added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) * locally first.  This means that the new target node will never be sent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) * a lock that he doesn't already have on the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) * In total, this means that the local lock is correct and should not be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) * updated to match the one sent by the master.  Any messages sent back
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) * from the master before the MIGRATING flag will bring the lock properly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) * up-to-date, and the change will be ordered properly for the waiter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) * We will *not* attempt to modify the lock underneath the waiter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 				     struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 				     struct dlm_migratable_lockres *mres)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 	struct dlm_migratable_lock *ml;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 	struct list_head *queue, *iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 	struct list_head *tmpq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 	struct dlm_lock *newlock = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 	struct dlm_lockstatus *lksb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 	int i, j, bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 	struct dlm_lock *lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 	u8 from = O2NM_MAX_NODES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 	__be64 c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) 	mlog(0, "running %d locks for this lockres\n", mres->num_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 	for (i=0; i<mres->num_locks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) 		ml = &(mres->ml[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) 		if (dlm_is_dummy_lock(dlm, ml, &from)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 			/* placeholder, just need to set the refmap bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 			BUG_ON(mres->num_locks != 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 			mlog(0, "%s:%.*s: dummy lock for %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 			     dlm->name, mres->lockname_len, mres->lockname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 			     from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 			spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 			dlm_lockres_set_refmap_bit(dlm, res, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 			spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 		BUG_ON(ml->highest_blocked != LKM_IVMODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 		newlock = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 		lksb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 		queue = dlm_list_num_to_pointer(res, ml->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 		tmpq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 		/* if the lock is for the local node it needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 		 * be moved to the proper location within the queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 		 * do not allocate a new lock structure. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 		if (ml->node == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 			/* MIGRATION ONLY! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 			BUG_ON(!(mres->flags & DLM_MRES_MIGRATION));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 			lock = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) 			spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 			for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 				tmpq = dlm_list_idx_to_ptr(res, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 				list_for_each(iter, tmpq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 					lock = list_entry(iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 						  struct dlm_lock, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 					if (lock->ml.cookie == ml->cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 						break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 					lock = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 				if (lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 			/* lock is always created locally first, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 			 * destroyed locally last.  it must be on the list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 			if (!lock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 				c = ml->cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 				mlog(ML_ERROR, "Could not find local lock "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 					       "with cookie %u:%llu, node %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 					       "list %u, flags 0x%x, type %d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 					       "conv %d, highest blocked %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 				     dlm_get_lock_cookie_node(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 				     dlm_get_lock_cookie_seq(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 				     ml->node, ml->list, ml->flags, ml->type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 				     ml->convert_type, ml->highest_blocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 				__dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 				BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 			if (lock->ml.node != ml->node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 				c = lock->ml.cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 				mlog(ML_ERROR, "Mismatched node# in lock "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 				     "cookie %u:%llu, name %.*s, node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 				     dlm_get_lock_cookie_node(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 				     dlm_get_lock_cookie_seq(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 				     res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 				     lock->ml.node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 				c = ml->cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 				mlog(ML_ERROR, "Migrate lock cookie %u:%llu, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 				     "node %u, list %u, flags 0x%x, type %d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 				     "conv %d, highest blocked %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 				     dlm_get_lock_cookie_node(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 				     dlm_get_lock_cookie_seq(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 				     ml->node, ml->list, ml->flags, ml->type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 				     ml->convert_type, ml->highest_blocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 				__dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 				BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 			if (tmpq != queue) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) 				c = ml->cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 				mlog(0, "Lock cookie %u:%llu was on list %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) 				     "instead of list %u for %.*s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) 				     dlm_get_lock_cookie_node(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 				     dlm_get_lock_cookie_seq(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) 				     j, ml->list, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 				     res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 				__dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 				spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 			/* see NOTE above about why we do not update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 			 * to match the master here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 			/* move the lock to its proper place */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 			/* do not alter lock refcount.  switching lists. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 			list_move_tail(&lock->list, queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) 			spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 			mlog(0, "just reordered a local lock!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 		/* lock is for another node. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 		newlock = dlm_new_lock(ml->type, ml->node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 				       be64_to_cpu(ml->cookie), NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 		if (!newlock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 			ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 			goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 		lksb = newlock->lksb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 		dlm_lock_attach_lockres(newlock, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 		if (ml->convert_type != LKM_IVMODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 			BUG_ON(queue != &res->converting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 			newlock->ml.convert_type = ml->convert_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 		lksb->flags |= (ml->flags &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 				(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 		if (ml->type == LKM_NLMODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 			goto skip_lvb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 		 * If the lock is in the blocked list it can't have a valid lvb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 		 * so skip it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 		if (ml->list == DLM_BLOCKED_LIST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 			goto skip_lvb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) 		if (!dlm_lvb_is_empty(mres->lvb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) 			if (lksb->flags & DLM_LKSB_PUT_LVB) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) 				/* other node was trying to update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) 				 * lvb when node died.  recreate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 				 * lksb with the updated lvb. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 				memcpy(lksb->lvb, mres->lvb, DLM_LVB_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 				/* the lock resource lvb update must happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 				 * NOW, before the spinlock is dropped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 				 * we no longer wait for the AST to update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 				 * the lvb. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 				memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 				/* otherwise, the node is sending its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 				 * most recent valid lvb info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 				BUG_ON(ml->type != LKM_EXMODE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 				       ml->type != LKM_PRMODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 				if (!dlm_lvb_is_empty(res->lvb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960)  				    (ml->type == LKM_EXMODE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961)  				     memcmp(res->lvb, mres->lvb, DLM_LVB_LEN))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962)  					int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963)  					mlog(ML_ERROR, "%s:%.*s: received bad "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964)  					     "lvb! type=%d\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965)  					     res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966)  					     res->lockname.name, ml->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967)  					printk("lockres lvb=[");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968)  					for (i=0; i<DLM_LVB_LEN; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969)  						printk("%02x", res->lvb[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970)  					printk("]\nmigrated lvb=[");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971)  					for (i=0; i<DLM_LVB_LEN; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972)  						printk("%02x", mres->lvb[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973)  					printk("]\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974)  					dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975)  					BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) 				memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) skip_lvb:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 		/* NOTE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 		 * wrt lock queue ordering and recovery:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 		 *    1. order of locks on granted queue is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 		 *       meaningless.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) 		 *    2. order of locks on converting queue is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 		 *       LOST with the node death.  sorry charlie.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 		 *    3. order of locks on the blocked queue is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 		 *       also LOST.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 		 * order of locks does not affect integrity, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 		 * just means that a lock request may get pushed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 		 * back in line as a result of the node death.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 		 * also note that for a given node the lock order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 		 * for its secondary queue locks is preserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 		 * relative to each other, but clearly *not*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 		 * preserved relative to locks from other nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 		bad = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 		spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 		list_for_each_entry(lock, queue, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 			if (lock->ml.cookie == ml->cookie) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 				c = lock->ml.cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 				mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 				     "exists on this lockres!\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 				     res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 				     dlm_get_lock_cookie_node(be64_to_cpu(c)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 				     dlm_get_lock_cookie_seq(be64_to_cpu(c)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 				mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 				     "node=%u, cookie=%u:%llu, queue=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 	      			     ml->type, ml->convert_type, ml->node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 				     dlm_get_lock_cookie_node(be64_to_cpu(ml->cookie)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 				     dlm_get_lock_cookie_seq(be64_to_cpu(ml->cookie)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 				     ml->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 				__dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) 				bad = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 		if (!bad) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 			dlm_lock_get(newlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 			if (mres->flags & DLM_MRES_RECOVERY &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) 					ml->list == DLM_CONVERTING_LIST &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) 					newlock->ml.type >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 					newlock->ml.convert_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 				/* newlock is doing downconvert, add it to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) 				 * head of converting list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 				list_add(&newlock->list, queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 			} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 				list_add_tail(&newlock->list, queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 			mlog(0, "%s:%.*s: added lock for node %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 			     "setting refmap bit\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 			     res->lockname.len, res->lockname.name, ml->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 			dlm_lockres_set_refmap_bit(dlm, res, ml->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 		spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 	mlog(0, "done running all the locks\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 	/* balance the ref taken when the work was queued */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 	spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 	dlm_lockres_drop_inflight_ref(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 	spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 		mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 				       struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) 	struct list_head *queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 	struct dlm_lock *lock, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 	assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 	assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) 	res->state |= DLM_LOCK_RES_RECOVERING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 	if (!list_empty(&res->recovering)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) 		mlog(0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 		     "Recovering res %s:%.*s, is already on recovery list!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) 		     dlm->name, res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) 		list_del_init(&res->recovering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 		dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) 	/* We need to hold a reference while on the recovery list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) 	dlm_lockres_get(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 	list_add_tail(&res->recovering, &dlm->reco.resources);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 	/* find any pending locks and put them back on proper list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 	for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 		queue = dlm_list_idx_to_ptr(res, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 		list_for_each_entry_safe(lock, next, queue, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 			dlm_lock_get(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 			if (lock->convert_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 				/* move converting lock back to granted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 				mlog(0, "node died with convert pending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 				     "on %.*s. move back to granted list.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) 				     res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) 				dlm_revert_pending_convert(res, lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) 				lock->convert_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 			} else if (lock->lock_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) 				/* remove pending lock requests completely */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 				BUG_ON(i != DLM_BLOCKED_LIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 				mlog(0, "node died with lock pending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 				     "on %.*s. remove from blocked list and skip.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 				     res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 				/* lock will be floating until ref in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 				 * dlmlock_remote is freed after the network
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 				 * call returns.  ok for it to not be on any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 				 * list since no ast can be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) 				 * (the master is dead). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) 				dlm_revert_pending_lock(res, lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) 				lock->lock_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 			} else if (lock->unlock_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 				/* if an unlock was in progress, treat as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 				 * if this had completed successfully
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) 				 * before sending this lock state to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 				 * new master.  note that the dlm_unlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) 				 * call is still responsible for calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) 				 * the unlockast.  that will happen after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 				 * the network call times out.  for now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 				 * just move lists to prepare the new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) 				 * recovery master.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) 				BUG_ON(i != DLM_GRANTED_LIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) 				mlog(0, "node died with unlock pending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 				     "on %.*s. remove from blocked list and skip.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) 				     res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 				dlm_commit_pending_unlock(res, lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 				lock->unlock_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 			} else if (lock->cancel_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) 				/* if a cancel was in progress, treat as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) 				 * if this had completed successfully
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 				 * before sending this lock state to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 				 * new master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 				BUG_ON(i != DLM_CONVERTING_LIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 				mlog(0, "node died with cancel pending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 				     "on %.*s. move back to granted list.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 				     res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 				dlm_commit_pending_cancel(res, lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 				lock->cancel_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 			dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) /* removes all recovered locks from the recovery list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135)  * sets the res->owner to the new master.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136)  * unsets the RECOVERY flag and wakes waiters. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 					      u8 dead_node, u8 new_master)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 	struct hlist_head *bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 	struct dlm_lock_resource *res, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 	assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 	list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 		if (res->owner == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) 			mlog(0, "%s: res %.*s, Changing owner from %u to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 			     dlm->name, res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) 			     res->owner, new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 			list_del_init(&res->recovering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 			spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 			/* new_master has our reference from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) 			 * the lock state sent during recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 			dlm_change_lockres_owner(dlm, res, new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) 			res->state &= ~DLM_LOCK_RES_RECOVERING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) 			if (__dlm_lockres_has_locks(res))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) 				__dlm_dirty_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) 			spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) 			wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) 			dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) 	/* this will become unnecessary eventually, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) 	 * for now we need to run the whole hash, clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) 	 * the RECOVERING state and set the owner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) 	 * if necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 	for (i = 0; i < DLM_HASH_BUCKETS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) 		bucket = dlm_lockres_hash(dlm, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) 		hlist_for_each_entry(res, bucket, hash_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 			if (res->state & DLM_LOCK_RES_RECOVERY_WAITING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) 				spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 				res->state &= ~DLM_LOCK_RES_RECOVERY_WAITING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) 				spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 				wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 			if (!(res->state & DLM_LOCK_RES_RECOVERING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) 			if (res->owner != dead_node &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) 			    res->owner != dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) 			if (!list_empty(&res->recovering)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) 				list_del_init(&res->recovering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) 				dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) 			/* new_master has our reference from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) 			 * the lock state sent during recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 			mlog(0, "%s: res %.*s, Changing owner from %u to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) 			     dlm->name, res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) 			     res->owner, new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) 			spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 			dlm_change_lockres_owner(dlm, res, new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) 			res->state &= ~DLM_LOCK_RES_RECOVERING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 			if (__dlm_lockres_has_locks(res))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) 				__dlm_dirty_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 			spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) 			wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) static inline int dlm_lvb_needs_invalidation(struct dlm_lock *lock, int local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 	if (local) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) 		if (lock->ml.type != LKM_EXMODE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) 		    lock->ml.type != LKM_PRMODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) 			return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) 	} else if (lock->ml.type == LKM_EXMODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) 			       struct dlm_lock_resource *res, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) 	struct list_head *queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) 	struct dlm_lock *lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) 	int blank_lvb = 0, local = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) 	u8 search_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) 	assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) 	assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) 	if (res->owner == dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) 		/* if this node owned the lockres, and if the dead node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) 		 * had an EX when he died, blank out the lvb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 		search_node = dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 	else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 		/* if this is a secondary lockres, and we had no EX or PR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 		 * locks granted, we can no longer trust the lvb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) 		search_node = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 		local = 1;  /* check local state for valid lvb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 	for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 		queue = dlm_list_idx_to_ptr(res, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) 		list_for_each_entry(lock, queue, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) 			if (lock->ml.node == search_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 				if (dlm_lvb_needs_invalidation(lock, local)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) 					/* zero the lksb lvb and lockres lvb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 					blank_lvb = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) 					memset(lock->lksb->lvb, 0, DLM_LVB_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) 	if (blank_lvb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) 		mlog(0, "clearing %.*s lvb, dead node %u had EX\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) 		     res->lockname.len, res->lockname.name, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 		memset(res->lvb, 0, DLM_LVB_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) 				struct dlm_lock_resource *res, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 	struct dlm_lock *lock, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) 	unsigned int freed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) 	/* this node is the lockres master:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) 	 * 1) remove any stale locks for the dead node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 	 * 2) if the dead node had an EX when he died, blank out the lvb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 	assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) 	assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 	/* We do two dlm_lock_put(). One for removing from list and the other is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) 	 * to force the DLM_UNLOCK_FREE_LOCK action so as to free the locks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 	/* TODO: check pending_asts, pending_basts here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 	list_for_each_entry_safe(lock, next, &res->granted, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 		if (lock->ml.node == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 			list_del_init(&lock->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 			dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 			/* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 			dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) 			freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 	list_for_each_entry_safe(lock, next, &res->converting, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 		if (lock->ml.node == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 			list_del_init(&lock->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 			dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 			/* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 			dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 			freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) 	list_for_each_entry_safe(lock, next, &res->blocked, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) 		if (lock->ml.node == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) 			list_del_init(&lock->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) 			dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) 			/* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) 			dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 			freed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 	if (freed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 		mlog(0, "%s:%.*s: freed %u locks for dead node %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 		     "dropping ref from lockres\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) 		     res->lockname.len, res->lockname.name, freed, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 		if(!test_bit(dead_node, res->refmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 			mlog(ML_ERROR, "%s:%.*s: freed %u locks for dead node %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) 			     "but ref was not set\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 			     res->lockname.len, res->lockname.name, freed, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 			__dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 		res->state |= DLM_LOCK_RES_RECOVERY_WAITING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 		dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 	} else if (test_bit(dead_node, res->refmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 		mlog(0, "%s:%.*s: dead node %u had a ref, but had "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 		     "no locks and had not purged before dying\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 		     res->lockname.len, res->lockname.name, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 		dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) 	/* do not kick thread yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) 	__dlm_dirty_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 	struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 	struct hlist_head *bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 	struct hlist_node *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 	struct dlm_lock *lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 	/* purge any stale mles */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 	dlm_clean_master_list(dlm, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 	 * now clean up all lock resources.  there are two rules:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) 	 * 1) if the dead node was the master, move the lockres
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) 	 *    to the recovering list.  set the RECOVERING flag.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) 	 *    this lockres needs to be cleaned up before it can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) 	 *    be used further.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 	 * 2) if this node was the master, remove all locks from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) 	 *    each of the lockres queues that were owned by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 	 *    dead node.  once recovery finishes, the dlm thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) 	 *    can be kicked again to see if any ASTs or BASTs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) 	 *    need to be fired as a result.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) 	for (i = 0; i < DLM_HASH_BUCKETS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) 		bucket = dlm_lockres_hash(dlm, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) 		hlist_for_each_entry_safe(res, tmp, bucket, hash_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358)  			/* always prune any $RECOVERY entries for dead nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359)  			 * otherwise hangs can occur during later recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) 			if (dlm_is_recovery_lock(res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) 						 res->lockname.len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) 				spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) 				list_for_each_entry(lock, &res->granted, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 					if (lock->ml.node == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) 						mlog(0, "AHA! there was "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 						     "a $RECOVERY lock for dead "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) 						     "node %u (%s)!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 						     dead_node, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 						list_del_init(&lock->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) 						dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 						/* Can't schedule
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 						 * DLM_UNLOCK_FREE_LOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) 						 * - do manually */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 						dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 						break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 					}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 				if ((res->owner == dead_node) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 							(res->state & DLM_LOCK_RES_DROPPING_REF)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 					dlm_lockres_get(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 					__dlm_do_purge_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 					spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 					wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 					dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 				} else if (res->owner == dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) 					dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 				spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) 			spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 			/* zero the lvb if necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) 			dlm_revalidate_lvb(dlm, res, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) 			if (res->owner == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) 				if (res->state & DLM_LOCK_RES_DROPPING_REF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) 					mlog(0, "%s:%.*s: owned by "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 						"dead node %u, this node was "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) 						"dropping its ref when master died. "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 						"continue, purging the lockres.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 						dlm->name, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 						res->lockname.name, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 					dlm_lockres_get(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 					__dlm_do_purge_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) 					spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) 					wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) 					dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) 				dlm_move_lockres_to_recovery_list(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 			} else if (res->owner == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) 				dlm_free_dead_locks(dlm, res, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 				__dlm_lockres_calc_usage(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) 			} else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 				if (test_bit(dead_node, res->refmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 					mlog(0, "%s:%.*s: dead node %u had a ref, but had "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 						"no locks and had not purged before dying\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) 						dlm->name, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 						res->lockname.name, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 					dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) 			spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 	assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 	if (dlm->reco.new_master == idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 		mlog(0, "%s: recovery master %d just died\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) 		     dlm->name, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) 		if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) 			/* finalize1 was reached, so it is safe to clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) 			 * the new_master and dead_node.  that recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) 			 * is complete. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) 			mlog(0, "%s: dead master %d had reached "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) 			     "finalize1 state, clearing\n", dlm->name, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 			dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) 			__dlm_reset_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) 	/* Clean up join state on node death. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) 	if (dlm->joining_node == idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) 		mlog(0, "Clearing join state for node %u\n", idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) 		__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) 	/* check to see if the node is already considered dead */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 	if (!test_bit(idx, dlm->live_nodes_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) 		mlog(0, "for domain %s, node %d is already dead. "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 		     "another node likely did recovery already.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 		     dlm->name, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) 	/* check to see if we do not care about this node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) 	if (!test_bit(idx, dlm->domain_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) 		/* This also catches the case that we get a node down
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) 		 * but haven't joined the domain yet. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) 		mlog(0, "node %u already removed from domain!\n", idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) 	clear_bit(idx, dlm->live_nodes_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) 	/* make sure local cleanup occurs before the heartbeat events */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) 	if (!test_bit(idx, dlm->recovery_map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) 		dlm_do_local_recovery_cleanup(dlm, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) 	/* notify anything attached to the heartbeat events */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) 	dlm_hb_event_notify_attached(dlm, idx, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) 	mlog(0, "node %u being removed from domain map!\n", idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) 	clear_bit(idx, dlm->domain_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) 	clear_bit(idx, dlm->exit_domain_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) 	/* wake up migration waiters if a node goes down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) 	 * perhaps later we can genericize this for other waiters. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) 	wake_up(&dlm->migration_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) 	set_bit(idx, dlm->recovery_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) 	struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) 	if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) 	 * This will notify any dlm users that a node in our domain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) 	 * went away without notifying us first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) 	if (test_bit(idx, dlm->domain_map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) 		dlm_fire_domain_eviction_callbacks(dlm, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) 	__dlm_hb_node_down(dlm, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) 	dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) 	struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) 	if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) 	set_bit(idx, dlm->live_nodes_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) 	/* do NOT notify mle attached to the heartbeat events.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) 	 * new nodes are not interesting in mastery until joined. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) 	dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) static void dlm_reco_ast(void *astdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) 	struct dlm_ctxt *dlm = astdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) 	mlog(0, "ast for recovery lock fired!, this=%u, dlm=%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) 	     dlm->node_num, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) static void dlm_reco_bast(void *astdata, int blocked_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) 	struct dlm_ctxt *dlm = astdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) 	mlog(0, "bast for recovery lock fired!, this=%u, dlm=%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) 	     dlm->node_num, dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) 	mlog(0, "unlockast for recovery lock fired!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543)  * dlm_pick_recovery_master will continually attempt to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544)  * dlmlock() on the special "$RECOVERY" lockres with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545)  * LKM_NOQUEUE flag to get an EX.  every thread that enters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546)  * this function on each node racing to become the recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547)  * master will not stop attempting this until either:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548)  * a) this node gets the EX (and becomes the recovery master),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549)  * or b) dlm->reco.new_master gets set to some nodenum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550)  * != O2NM_INVALID_NODE_NUM (another node will do the reco).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551)  * so each time a recovery master is needed, the entire cluster
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552)  * will sync at this point.  if the new master dies, that will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553)  * be detected in dlm_do_recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) static int dlm_pick_recovery_master(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) 	enum dlm_status ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) 	struct dlm_lockstatus lksb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) 	int status = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) 	mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) 	     dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) 	memset(&lksb, 0, sizeof(lksb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) 	ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) 		      DLM_RECOVERY_LOCK_NAME, DLM_RECOVERY_LOCK_NAME_LEN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) 		      dlm_reco_ast, dlm, dlm_reco_bast);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) 	mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) 	     dlm->name, ret, lksb.status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) 	if (ret == DLM_NORMAL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) 		mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) 		     dlm->name, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) 		/* got the EX lock.  check to see if another node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) 		 * just became the reco master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) 		if (dlm_reco_master_ready(dlm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) 			mlog(0, "%s: got reco EX lock, but %u will "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) 			     "do the recovery\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) 			     dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) 			status = -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) 			status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) 			/* see if recovery was already finished elsewhere */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) 			spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) 			if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) 				status = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) 				mlog(0, "%s: got reco EX lock, but "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) 				     "node got recovered already\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) 				if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) 					mlog(ML_ERROR, "%s: new master is %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) 					     "but no dead node!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) 					     dlm->name, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) 					BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) 			spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) 		/* if this node has actually become the recovery master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) 		 * set the master and send the messages to begin recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) 		if (!status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) 			mlog(0, "%s: dead=%u, this=%u, sending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) 			     "begin_reco now\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) 			     dlm->reco.dead_node, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) 			status = dlm_send_begin_reco_message(dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) 				      dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) 			/* this always succeeds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) 			BUG_ON(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) 			/* set the new_master to this node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) 			spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) 			dlm_set_reco_master(dlm, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) 			spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) 		/* recovery lock is a special case.  ast will not get fired,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) 		 * so just go ahead and unlock it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) 		ret = dlmunlock(dlm, &lksb, 0, dlm_reco_unlock_ast, dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) 		if (ret == DLM_DENIED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) 			mlog(0, "got DLM_DENIED, trying LKM_CANCEL\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) 			ret = dlmunlock(dlm, &lksb, LKM_CANCEL, dlm_reco_unlock_ast, dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) 		if (ret != DLM_NORMAL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) 			/* this would really suck. this could only happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) 			 * if there was a network error during the unlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) 			 * because of node death.  this means the unlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) 			 * is actually "done" and the lock structure is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) 			 * even freed.  we can continue, but only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) 			 * because this specific lock name is special. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) 			mlog(ML_ERROR, "dlmunlock returned %d\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) 	} else if (ret == DLM_NOTQUEUED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) 		mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) 		     dlm->name, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) 		/* another node is master. wait on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) 		 * reco.new_master != O2NM_INVALID_NODE_NUM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) 		 * for at most one second */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) 		wait_event_timeout(dlm->dlm_reco_thread_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) 					 dlm_reco_master_ready(dlm),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) 					 msecs_to_jiffies(1000));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) 		if (!dlm_reco_master_ready(dlm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) 			mlog(0, "%s: reco master taking awhile\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) 			     dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) 			goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) 		/* another node has informed this one that it is reco master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) 		mlog(0, "%s: reco master %u is ready to recover %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) 		     dlm->name, dlm->reco.new_master, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) 		status = -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) 	} else if (ret == DLM_RECOVERING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) 		mlog(0, "dlm=%s dlmlock says master node died (this=%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) 		     dlm->name, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) 		goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) 		struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) 		/* dlmlock returned something other than NOTQUEUED or NORMAL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) 		mlog(ML_ERROR, "%s: got %s from dlmlock($RECOVERY), "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) 		     "lksb.status=%s\n", dlm->name, dlm_errname(ret),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) 		     dlm_errname(lksb.status));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) 		res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) 					 DLM_RECOVERY_LOCK_NAME_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) 		if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) 			dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) 			dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) 			mlog(ML_ERROR, "recovery lock not found\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) 	return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) 	struct dlm_begin_reco br;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) 	struct dlm_node_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) 	int nodenum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) 	int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) 	mlog(0, "%s: dead node is %u\n", dlm->name, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) 	dlm_node_iter_init(dlm->domain_map, &iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) 	clear_bit(dead_node, iter.node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) 	memset(&br, 0, sizeof(br));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) 	br.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) 	br.dead_node = dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) 	while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) 		if (nodenum == dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) 			mlog(0, "not sending begin reco to dead node "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) 				  "%u\n", dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) 		if (nodenum == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) 			mlog(0, "not sending begin reco to self\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) 		mlog(0, "attempting to send begin reco msg to %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) 			  nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) 		ret = o2net_send_message(DLM_BEGIN_RECO_MSG, dlm->key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) 					 &br, sizeof(br), nodenum, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) 		/* negative status is handled ok by caller here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) 		if (ret >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) 			ret = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) 		if (dlm_is_host_down(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) 			/* node is down.  not involved in recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) 			 * so just keep going */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) 			mlog(ML_NOTICE, "%s: node %u was down when sending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) 			     "begin reco msg (%d)\n", dlm->name, nodenum, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) 			ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) 		 * Prior to commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) 		 * dlm_begin_reco_handler() returned EAGAIN and not -EAGAIN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) 		 * We are handling both for compatibility reasons.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) 		if (ret == -EAGAIN || ret == EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) 			mlog(0, "%s: trying to start recovery of node "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) 			     "%u, but node %u is waiting for last recovery "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) 			     "to complete, backoff for a bit\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) 			     dead_node, nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) 			msleep(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) 			goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) 		if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) 			struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) 			/* this is now a serious problem, possibly ENOMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) 			 * in the network stack.  must retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) 			mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) 			mlog(ML_ERROR, "begin reco of dlm %s to node %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) 			     "returned %d\n", dlm->name, nodenum, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) 			res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) 						 DLM_RECOVERY_LOCK_NAME_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) 			if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) 				dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) 				dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) 				mlog(ML_ERROR, "recovery lock not found\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) 			/* sleep for a bit in hopes that we can avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) 			 * another ENOMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) 			msleep(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) 			goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) 			   void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) 	struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) 	struct dlm_begin_reco *br = (struct dlm_begin_reco *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) 	/* ok to return 0, domain has gone away */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) 	if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) 	if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) 		mlog(0, "%s: node %u wants to recover node %u (%u:%u) "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) 		     "but this node is in finalize state, waiting on finalize2\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) 		     dlm->name, br->node_idx, br->dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) 		     dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) 		spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) 		dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) 	mlog(0, "%s: node %u wants to recover node %u (%u:%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) 	     dlm->name, br->node_idx, br->dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) 	     dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) 	dlm_fire_domain_eviction_callbacks(dlm, br->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) 	if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) 		if (test_bit(dlm->reco.new_master, dlm->recovery_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) 			mlog(0, "%s: new_master %u died, changing "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) 			     "to %u\n", dlm->name, dlm->reco.new_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) 			     br->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) 			mlog(0, "%s: new_master %u NOT DEAD, changing "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) 			     "to %u\n", dlm->name, dlm->reco.new_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) 			     br->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) 			/* may not have seen the new master as dead yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) 	if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) 		mlog(ML_NOTICE, "%s: dead_node previously set to %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) 		     "node %u changing it to %u\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) 		     dlm->reco.dead_node, br->node_idx, br->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) 	dlm_set_reco_master(dlm, br->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) 	dlm_set_reco_dead_node(dlm, br->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) 	if (!test_bit(br->dead_node, dlm->recovery_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) 		mlog(0, "recovery master %u sees %u as dead, but this "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) 		     "node has not yet.  marking %u as dead\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) 		     br->node_idx, br->dead_node, br->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) 		if (!test_bit(br->dead_node, dlm->domain_map) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) 		    !test_bit(br->dead_node, dlm->live_nodes_map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) 			mlog(0, "%u not in domain/live_nodes map "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) 			     "so setting it in reco map manually\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) 			     br->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) 		/* force the recovery cleanup in __dlm_hb_node_down
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) 		 * both of these will be cleared in a moment */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) 		set_bit(br->dead_node, dlm->domain_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) 		set_bit(br->dead_node, dlm->live_nodes_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) 		__dlm_hb_node_down(dlm, br->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) 	dlm_kick_recovery_thread(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) 	mlog(0, "%s: recovery started by node %u, for %u (%u:%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) 	     dlm->name, br->node_idx, br->dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) 	     dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) 	dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) #define DLM_FINALIZE_STAGE2  0x01
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) 	struct dlm_finalize_reco fr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) 	struct dlm_node_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) 	int nodenum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) 	int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) 	int stage = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) 	mlog(0, "finishing recovery for node %s:%u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) 	     "stage %d\n", dlm->name, dlm->reco.dead_node, stage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) 	dlm_node_iter_init(dlm->domain_map, &iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) 	spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) stage2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) 	memset(&fr, 0, sizeof(fr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) 	fr.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) 	fr.dead_node = dlm->reco.dead_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) 	if (stage == 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) 		fr.flags |= DLM_FINALIZE_STAGE2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) 	while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) 		if (nodenum == dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) 		ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) 					 &fr, sizeof(fr), nodenum, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) 		if (ret >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) 			ret = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) 		if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) 			mlog(ML_ERROR, "Error %d when sending message %u (key "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) 			     "0x%x) to node %u\n", ret, DLM_FINALIZE_RECO_MSG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) 			     dlm->key, nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) 			if (dlm_is_host_down(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) 				/* this has no effect on this recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) 				 * session, so set the status to zero to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) 				 * finish out the last recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) 				mlog(ML_ERROR, "node %u went down after this "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) 				     "node finished recovery.\n", nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) 				ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) 	if (stage == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) 		/* reset the node_iter back to the top and send finalize2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) 		iter.curnode = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) 		stage = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) 		goto stage2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) 			      void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) 	struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) 	struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) 	int stage = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) 	/* ok to return 0, domain has gone away */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) 	if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) 	if (fr->flags & DLM_FINALIZE_STAGE2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) 		stage = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) 	mlog(0, "%s: node %u finalizing recovery stage%d of "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) 	     "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) 	     fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) 	spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) 	if (dlm->reco.new_master != fr->node_idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) 		mlog(ML_ERROR, "node %u sent recovery finalize msg, but node "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) 		     "%u is supposed to be the new master, dead=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) 		     fr->node_idx, dlm->reco.new_master, fr->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) 	if (dlm->reco.dead_node != fr->dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) 		mlog(ML_ERROR, "node %u sent recovery finalize msg for dead "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) 		     "node %u, but node %u is supposed to be dead\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) 		     fr->node_idx, fr->dead_node, dlm->reco.dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) 	switch (stage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) 		case 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) 			dlm_finish_local_lockres_recovery(dlm, fr->dead_node, fr->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) 			if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) 				mlog(ML_ERROR, "%s: received finalize1 from "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) 				     "new master %u for dead node %u, but "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) 				     "this node has already received it!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) 				     dlm->name, fr->node_idx, fr->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) 				dlm_print_reco_node_status(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) 				BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) 			dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) 			spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) 		case 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) 			if (!(dlm->reco.state & DLM_RECO_STATE_FINALIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) 				mlog(ML_ERROR, "%s: received finalize2 from "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) 				     "new master %u for dead node %u, but "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) 				     "this node did not have finalize1!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) 				     dlm->name, fr->node_idx, fr->dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) 				dlm_print_reco_node_status(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) 				BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) 			dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) 			__dlm_reset_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) 			spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) 			dlm_kick_recovery_thread(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) 	mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) 	     dlm->name, fr->node_idx, dlm->reco.dead_node, dlm->reco.new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) 	dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) }