^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* -*- mode: c; c-basic-offset: 8; -*-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * vim: noexpandtab sw=8 ts=8 sts=0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * dlmmod.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * standalone DLM module
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Copyright (C) 2004 Oracle. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/sysctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/socket.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/inet.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/spinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include "../cluster/heartbeat.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include "../cluster/nodemanager.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include "../cluster/tcp.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include "dlmapi.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include "dlmcommon.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include "dlmdomain.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include "dlmdebug.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include "../cluster/masklog.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) static void dlm_mle_node_down(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) struct dlm_master_list_entry *mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) struct o2nm_node *node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) int idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) static void dlm_mle_node_up(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) struct dlm_master_list_entry *mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) struct o2nm_node *node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) int idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) static void dlm_assert_master_worker(struct dlm_work_item *item, void *data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) static int dlm_do_assert_master(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) void *nodemap, u32 flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) struct dlm_master_list_entry *mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) const char *name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) unsigned int namelen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) if (dlm != mle->dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) if (namelen != mle->mnamelen ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) memcmp(name, mle->mname, namelen) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) static struct kmem_cache *dlm_lockres_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) static struct kmem_cache *dlm_lockname_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) static struct kmem_cache *dlm_mle_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) static void dlm_mle_release(struct kref *kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) static void dlm_init_mle(struct dlm_master_list_entry *mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) enum dlm_mle_type type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) const char *name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) unsigned int namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) static void dlm_put_mle(struct dlm_master_list_entry *mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) static void __dlm_put_mle(struct dlm_master_list_entry *mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) static int dlm_find_mle(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) struct dlm_master_list_entry **mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) char *name, unsigned int namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) static int dlm_do_master_request(struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) struct dlm_master_list_entry *mle, int to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) struct dlm_master_list_entry *mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) int *blocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) struct dlm_master_list_entry *mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) int blocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) struct dlm_master_list_entry *mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) struct dlm_master_list_entry **oldmle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) const char *name, unsigned int namelen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) u8 new_master, u8 master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) struct dlm_lock_resource *res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) struct dlm_lock_resource *res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) u8 target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) struct dlm_lock_resource *res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) int dlm_is_host_down(int errno)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) switch (errno) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) case -EBADF:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) case -ECONNREFUSED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) case -ENOTCONN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) case -ECONNRESET:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) case -EPIPE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) case -EHOSTDOWN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) case -EHOSTUNREACH:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) case -ETIMEDOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) case -ECONNABORTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) case -ENETDOWN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) case -ENETUNREACH:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) case -ENETRESET:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) case -ESHUTDOWN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) case -ENOPROTOOPT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) case -EINVAL: /* if returned from our tcp code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) this means there is no socket */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * MASTER LIST FUNCTIONS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * regarding master list entries and heartbeat callbacks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) * in order to avoid sleeping and allocation that occurs in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * heartbeat, master list entries are simply attached to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * dlm's established heartbeat callbacks. the mle is attached
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * when it is created, and since the dlm->spinlock is held at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * that time, any heartbeat event will be properly discovered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * by the mle. the mle needs to be detached from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * dlm->mle_hb_events list as soon as heartbeat events are no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * longer useful to the mle, and before the mle is freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * as a general rule, heartbeat events are no longer needed by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * the mle once an "answer" regarding the lock master has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * received.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) static inline void __dlm_mle_attach_hb_events(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) struct dlm_master_list_entry *mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) list_add_tail(&mle->hb_events, &dlm->mle_hb_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) static inline void __dlm_mle_detach_hb_events(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) struct dlm_master_list_entry *mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) if (!list_empty(&mle->hb_events))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) list_del_init(&mle->hb_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) static inline void dlm_mle_detach_hb_events(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) struct dlm_master_list_entry *mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) __dlm_mle_detach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) static void dlm_get_mle_inuse(struct dlm_master_list_entry *mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) struct dlm_ctxt *dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) dlm = mle->dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) assert_spin_locked(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) mle->inuse++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) kref_get(&mle->mle_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) static void dlm_put_mle_inuse(struct dlm_master_list_entry *mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) struct dlm_ctxt *dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) dlm = mle->dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) mle->inuse--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) __dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) /* remove from list and free */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) static void __dlm_put_mle(struct dlm_master_list_entry *mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) struct dlm_ctxt *dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) dlm = mle->dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) assert_spin_locked(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (!kref_read(&mle->mle_refs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) /* this may or may not crash, but who cares.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * it's a BUG. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) mlog(ML_ERROR, "bad mle: %p\n", mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) dlm_print_one_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) kref_put(&mle->mle_refs, dlm_mle_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) /* must not have any spinlocks coming in */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) static void dlm_put_mle(struct dlm_master_list_entry *mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) struct dlm_ctxt *dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) dlm = mle->dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) __dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) static inline void dlm_get_mle(struct dlm_master_list_entry *mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) kref_get(&mle->mle_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) static void dlm_init_mle(struct dlm_master_list_entry *mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) enum dlm_mle_type type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) const char *name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) unsigned int namelen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) mle->dlm = dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) mle->type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) INIT_HLIST_NODE(&mle->master_hash_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) INIT_LIST_HEAD(&mle->hb_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) spin_lock_init(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) init_waitqueue_head(&mle->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) atomic_set(&mle->woken, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) kref_init(&mle->mle_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) memset(mle->response_map, 0, sizeof(mle->response_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) mle->master = O2NM_MAX_NODES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) mle->new_master = O2NM_MAX_NODES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) mle->inuse = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) BUG_ON(mle->type != DLM_MLE_BLOCK &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) mle->type != DLM_MLE_MASTER &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) mle->type != DLM_MLE_MIGRATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) if (mle->type == DLM_MLE_MASTER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) BUG_ON(!res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) mle->mleres = res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) memcpy(mle->mname, res->lockname.name, res->lockname.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) mle->mnamelen = res->lockname.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) mle->mnamehash = res->lockname.hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) BUG_ON(!name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) mle->mleres = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) memcpy(mle->mname, name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) mle->mnamelen = namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) mle->mnamehash = dlm_lockid_hash(name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) atomic_inc(&dlm->mle_tot_count[mle->type]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) atomic_inc(&dlm->mle_cur_count[mle->type]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) /* copy off the node_map and register hb callbacks on our copy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) memcpy(mle->node_map, dlm->domain_map, sizeof(mle->node_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) memcpy(mle->vote_map, dlm->domain_map, sizeof(mle->vote_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) clear_bit(dlm->node_num, mle->vote_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) clear_bit(dlm->node_num, mle->node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) /* attach the mle to the domain node up/down events */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) __dlm_mle_attach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) assert_spin_locked(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) if (!hlist_unhashed(&mle->master_hash_node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) hlist_del_init(&mle->master_hash_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) struct hlist_head *bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) assert_spin_locked(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) bucket = dlm_master_hash(dlm, mle->mnamehash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) hlist_add_head(&mle->master_hash_node, bucket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) /* returns 1 if found, 0 if not */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) static int dlm_find_mle(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) struct dlm_master_list_entry **mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) char *name, unsigned int namelen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) struct dlm_master_list_entry *tmpmle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) struct hlist_head *bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) unsigned int hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) assert_spin_locked(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) hash = dlm_lockid_hash(name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) bucket = dlm_master_hash(dlm, hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) hlist_for_each_entry(tmpmle, bucket, master_hash_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) if (!dlm_mle_equal(dlm, tmpmle, name, namelen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) dlm_get_mle(tmpmle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) *mle = tmpmle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) struct dlm_master_list_entry *mle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) if (node_up)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) dlm_mle_node_up(dlm, mle, NULL, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) dlm_mle_node_down(dlm, mle, NULL, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) static void dlm_mle_node_down(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) struct dlm_master_list_entry *mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) struct o2nm_node *node, int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) spin_lock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) if (!test_bit(idx, mle->node_map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) mlog(0, "node %u already removed from nodemap!\n", idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) clear_bit(idx, mle->node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) spin_unlock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) static void dlm_mle_node_up(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) struct dlm_master_list_entry *mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) struct o2nm_node *node, int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) spin_lock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) if (test_bit(idx, mle->node_map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) mlog(0, "node %u already in node map!\n", idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) set_bit(idx, mle->node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) spin_unlock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) int dlm_init_mle_cache(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) dlm_mle_cache = kmem_cache_create("o2dlm_mle",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) sizeof(struct dlm_master_list_entry),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 0, SLAB_HWCACHE_ALIGN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) if (dlm_mle_cache == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) void dlm_destroy_mle_cache(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) kmem_cache_destroy(dlm_mle_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) static void dlm_mle_release(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) struct dlm_master_list_entry *mle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) struct dlm_ctxt *dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) mle = container_of(kref, struct dlm_master_list_entry, mle_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) dlm = mle->dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) assert_spin_locked(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) mlog(0, "Releasing mle for %.*s, type %d\n", mle->mnamelen, mle->mname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) mle->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) /* remove from list if not already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) __dlm_unlink_mle(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) /* detach the mle from the domain node up/down events */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) __dlm_mle_detach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) atomic_dec(&dlm->mle_cur_count[mle->type]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) /* NOTE: kfree under spinlock here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) * if this is bad, we can move this to a freelist. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) kmem_cache_free(dlm_mle_cache, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) * LOCK RESOURCE FUNCTIONS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) int dlm_init_master_caches(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) dlm_lockres_cache = kmem_cache_create("o2dlm_lockres",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) sizeof(struct dlm_lock_resource),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 0, SLAB_HWCACHE_ALIGN, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) if (!dlm_lockres_cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) dlm_lockname_cache = kmem_cache_create("o2dlm_lockname",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) DLM_LOCKID_NAME_MAX, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) SLAB_HWCACHE_ALIGN, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) if (!dlm_lockname_cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) dlm_destroy_master_caches();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) void dlm_destroy_master_caches(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) kmem_cache_destroy(dlm_lockname_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) dlm_lockname_cache = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) kmem_cache_destroy(dlm_lockres_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) dlm_lockres_cache = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) static void dlm_lockres_release(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) struct dlm_ctxt *dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) res = container_of(kref, struct dlm_lock_resource, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) dlm = res->dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) /* This should not happen -- all lockres' have a name
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * associated with them at init time. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) BUG_ON(!res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) mlog(0, "destroying lockres %.*s\n", res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) atomic_dec(&dlm->res_cur_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) if (!hlist_unhashed(&res->hash_node) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) !list_empty(&res->granted) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) !list_empty(&res->converting) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) !list_empty(&res->blocked) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) !list_empty(&res->dirty) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) !list_empty(&res->recovering) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) !list_empty(&res->purge)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) mlog(ML_ERROR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) "Going to BUG for resource %.*s."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) " We're on a list! [%c%c%c%c%c%c%c]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) !hlist_unhashed(&res->hash_node) ? 'H' : ' ',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) !list_empty(&res->granted) ? 'G' : ' ',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) !list_empty(&res->converting) ? 'C' : ' ',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) !list_empty(&res->blocked) ? 'B' : ' ',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) !list_empty(&res->dirty) ? 'D' : ' ',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) !list_empty(&res->recovering) ? 'R' : ' ',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) !list_empty(&res->purge) ? 'P' : ' ');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) /* By the time we're ready to blow this guy away, we shouldn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) * be on any lists. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) BUG_ON(!hlist_unhashed(&res->hash_node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) BUG_ON(!list_empty(&res->granted));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) BUG_ON(!list_empty(&res->converting));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) BUG_ON(!list_empty(&res->blocked));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) BUG_ON(!list_empty(&res->dirty));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) BUG_ON(!list_empty(&res->recovering));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) BUG_ON(!list_empty(&res->purge));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) kmem_cache_free(dlm_lockres_cache, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) void dlm_lockres_put(struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) kref_put(&res->refs, dlm_lockres_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) static void dlm_init_lockres(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) const char *name, unsigned int namelen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) char *qname;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) /* If we memset here, we lose our reference to the kmalloc'd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * res->lockname.name, so be sure to init every field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * correctly! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) qname = (char *) res->lockname.name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) memcpy(qname, name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) res->lockname.len = namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) res->lockname.hash = dlm_lockid_hash(name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) init_waitqueue_head(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) spin_lock_init(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) INIT_HLIST_NODE(&res->hash_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) INIT_LIST_HEAD(&res->granted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) INIT_LIST_HEAD(&res->converting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) INIT_LIST_HEAD(&res->blocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) INIT_LIST_HEAD(&res->dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) INIT_LIST_HEAD(&res->recovering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) INIT_LIST_HEAD(&res->purge);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) INIT_LIST_HEAD(&res->tracking);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) atomic_set(&res->asts_reserved, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) res->migration_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) res->inflight_locks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) res->inflight_assert_workers = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) res->dlm = dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) kref_init(&res->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) atomic_inc(&dlm->res_tot_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) atomic_inc(&dlm->res_cur_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) /* just for consistency */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) res->state = DLM_LOCK_RES_IN_PROGRESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) res->last_used = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) spin_lock(&dlm->track_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) list_add_tail(&res->tracking, &dlm->tracking_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) spin_unlock(&dlm->track_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) memset(res->lvb, 0, DLM_LVB_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) memset(res->refmap, 0, sizeof(res->refmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) const char *name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) unsigned int namelen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) struct dlm_lock_resource *res = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) res = kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) if (!res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) res->lockname.name = kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) if (!res->lockname.name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) dlm_init_lockres(dlm, res, name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) return res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) if (res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) kmem_cache_free(dlm_lockres_cache, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) void dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) struct dlm_lock_resource *res, int bit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) mlog(0, "res %.*s, set node %u, %ps()\n", res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) res->lockname.name, bit, __builtin_return_address(0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) set_bit(bit, res->refmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) struct dlm_lock_resource *res, int bit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) mlog(0, "res %.*s, clr node %u, %ps()\n", res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) res->lockname.name, bit, __builtin_return_address(0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) clear_bit(bit, res->refmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) static void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) res->inflight_locks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) mlog(0, "%s: res %.*s, inflight++: now %u, %ps()\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) res->lockname.len, res->lockname.name, res->inflight_locks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) __builtin_return_address(0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) __dlm_lockres_grab_inflight_ref(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) BUG_ON(res->inflight_locks == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) res->inflight_locks--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) mlog(0, "%s: res %.*s, inflight--: now %u, %ps()\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) res->lockname.len, res->lockname.name, res->inflight_locks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) __builtin_return_address(0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) res->inflight_assert_workers++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) mlog(0, "%s:%.*s: inflight assert worker++: now %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) dlm->name, res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) res->inflight_assert_workers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) BUG_ON(res->inflight_assert_workers == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) res->inflight_assert_workers--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) mlog(0, "%s:%.*s: inflight assert worker--: now %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) dlm->name, res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) res->inflight_assert_workers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) static void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) __dlm_lockres_drop_inflight_worker(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) * lookup a lock resource by name.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) * may already exist in the hashtable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) * lockid is null terminated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) * if not, allocate enough for the lockres and for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) * the temporary structure used in doing the mastering.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * also, do a lookup in the dlm->master_list to see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * if another node has begun mastering the same lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) * if so, there should be a block entry in there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) * for this name, and we should *not* attempt to master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) * the lock here. need to wait around for that node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) * to assert_master (or die).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) const char *lockid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) int namelen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) struct dlm_lock_resource *tmpres=NULL, *res=NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) struct dlm_master_list_entry *mle = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) struct dlm_master_list_entry *alloc_mle = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) int blocked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) int ret, nodenum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) struct dlm_node_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) unsigned int hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) int tries = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) int bit, wait_on_recovery = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) BUG_ON(!lockid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) hash = dlm_lockid_hash(lockid, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) mlog(0, "get lockres %s (len %d)\n", lockid, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) lookup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) if (tmpres) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) spin_lock(&tmpres->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) * Right after dlm spinlock was released, dlm_thread could have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) * purged the lockres. Check if lockres got unhashed. If so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) * start over.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) if (hlist_unhashed(&tmpres->hash_node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) spin_unlock(&tmpres->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) dlm_lockres_put(tmpres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) tmpres = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) goto lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) /* Wait on the thread that is mastering the resource */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) __dlm_wait_on_lockres(tmpres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) spin_unlock(&tmpres->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) dlm_lockres_put(tmpres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) tmpres = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) goto lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) /* Wait on the resource purge to complete before continuing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) BUG_ON(tmpres->owner == dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) __dlm_wait_on_lockres_flags(tmpres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) DLM_LOCK_RES_DROPPING_REF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) spin_unlock(&tmpres->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) dlm_lockres_put(tmpres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) tmpres = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) goto lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) /* Grab inflight ref to pin the resource */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) dlm_lockres_grab_inflight_ref(dlm, tmpres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) spin_unlock(&tmpres->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) spin_lock(&dlm->track_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) if (!list_empty(&res->tracking))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) list_del_init(&res->tracking);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) mlog(ML_ERROR, "Resource %.*s not "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) "on the Tracking list\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) spin_unlock(&dlm->track_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) res = tmpres;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) if (!res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) mlog(0, "allocating a new resource\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) /* nothing found and we need to allocate one. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) alloc_mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) if (!alloc_mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) res = dlm_new_lockres(dlm, lockid, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) if (!res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) goto lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) mlog(0, "no lockres found, allocated our own: %p\n", res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) if (flags & LKM_LOCAL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) /* caller knows it's safe to assume it's not mastered elsewhere
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) * DONE! return right away */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) dlm_change_lockres_owner(dlm, res, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) __dlm_insert_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) dlm_lockres_grab_inflight_ref(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) /* lockres still marked IN_PROGRESS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) goto wake_waiters;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) /* check master list to see if another node has started mastering it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) /* if we found a block, wait for lock to be mastered by another node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) blocked = dlm_find_mle(dlm, &mle, (char *)lockid, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) if (blocked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) int mig;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) if (mle->type == DLM_MLE_MASTER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) mlog(ML_ERROR, "master entry for nonexistent lock!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) mig = (mle->type == DLM_MLE_MIGRATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) /* if there is a migration in progress, let the migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) * finish before continuing. we can wait for the absence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) * of the MIGRATION mle: either the migrate finished or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) * one of the nodes died and the mle was cleaned up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) * if there is a BLOCK here, but it already has a master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) * set, we are too late. the master does not have a ref
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) * for us in the refmap. detach the mle and drop it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) * either way, go back to the top and start over. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) if (mig || mle->master != O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) BUG_ON(mig && mle->master == dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) /* we arrived too late. the master does not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) * have a ref for us. retry. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) mlog(0, "%s:%.*s: late on %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) dlm->name, namelen, lockid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) mig ? "MIGRATION" : "BLOCK");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) /* master is known, detach */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) if (!mig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) dlm_mle_detach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) mle = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) /* this is lame, but we can't wait on either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) * the mle or lockres waitqueue here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) if (mig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) msleep(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) goto lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) /* go ahead and try to master lock on this node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) mle = alloc_mle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) /* make sure this does not get freed below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) alloc_mle = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) set_bit(dlm->node_num, mle->maybe_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) __dlm_insert_mle(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) /* still holding the dlm spinlock, check the recovery map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) * to see if there are any nodes that still need to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) * considered. these will not appear in the mle nodemap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) * but they might own this lockres. wait on them. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) if (bit < O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) mlog(0, "%s: res %.*s, At least one node (%d) "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) "to recover before lock mastery can begin\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) dlm->name, namelen, (char *)lockid, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) wait_on_recovery = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) /* at this point there is either a DLM_MLE_BLOCK or a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) * DLM_MLE_MASTER on the master list, so it's safe to add the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) * lockres to the hashtable. anyone who finds the lock will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) * still have to wait on the IN_PROGRESS. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) /* finally add the lockres to its hash bucket */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) __dlm_insert_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) /* since this lockres is new it doesn't not require the spinlock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) __dlm_lockres_grab_inflight_ref(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) /* get an extra ref on the mle in case this is a BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) * if so, the creator of the BLOCK may try to put the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) * ref at this time in the assert master handler, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) * need an extra one to keep from a bad ptr deref. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) dlm_get_mle_inuse(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) redo_request:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) while (wait_on_recovery) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) /* any cluster changes that occurred after dropping the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) * dlm spinlock would be detectable be a change on the mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) * so we only need to clear out the recovery map once. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) if (dlm_is_recovery_lock(lockid, namelen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) mlog(0, "%s: Recovery map is not empty, but must "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) "master $RECOVERY lock now\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) if (!dlm_pre_master_reco_lockres(dlm, res))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) wait_on_recovery = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) mlog(0, "%s: waiting 500ms for heartbeat state "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) "change\n", dlm->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) msleep(500);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) dlm_kick_recovery_thread(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) msleep(1000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) dlm_wait_for_recovery(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) if (bit < O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) mlog(0, "%s: res %.*s, At least one node (%d) "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) "to recover before lock mastery can begin\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) dlm->name, namelen, (char *)lockid, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) wait_on_recovery = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) wait_on_recovery = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) if (wait_on_recovery)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) dlm_wait_for_node_recovery(dlm, bit, 10000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) /* must wait for lock to be mastered elsewhere */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) if (blocked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) goto wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) dlm_node_iter_init(mle->vote_map, &iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) ret = dlm_do_master_request(res, mle, nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) if (mle->master != O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) /* found a master ! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) if (mle->master <= nodenum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) /* if our master request has not reached the master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) * yet, keep going until it does. this is how the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) * master will know that asserts are needed back to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) * the lower nodes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) mlog(0, "%s: res %.*s, Requests only up to %u but "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) "master is %u, keep going\n", dlm->name, namelen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) lockid, nodenum, mle->master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) wait:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) /* keep going until the response map includes all nodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) wait_on_recovery = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) mlog(0, "%s: res %.*s, Node map changed, redo the master "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) "request now, blocked=%d\n", dlm->name, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) res->lockname.name, blocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) if (++tries > 20) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) mlog(ML_ERROR, "%s: res %.*s, Spinning on "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) "dlm_wait_for_lock_mastery, blocked = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) dlm->name, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) res->lockname.name, blocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) dlm_print_one_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) tries = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) goto redo_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) mlog(0, "%s: res %.*s, Mastered by %u\n", dlm->name, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) res->lockname.name, res->owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) /* make sure we never continue without this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) BUG_ON(res->owner == O2NM_MAX_NODES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) /* master is known, detach if not already detached */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) dlm_mle_detach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) /* put the extra ref */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) dlm_put_mle_inuse(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) wake_waiters:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) /* need to free the unused mle */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) if (alloc_mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) kmem_cache_free(dlm_mle_cache, alloc_mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) return res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) #define DLM_MASTERY_TIMEOUT_MS 5000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) struct dlm_master_list_entry *mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) int *blocked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) u8 m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) int ret, bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) int map_changed, voting_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) int assert, sleep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) recheck:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) assert = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) /* check if another node has already become the owner */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) mlog(0, "%s:%.*s: owner is suddenly %u\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) res->lockname.len, res->lockname.name, res->owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) /* this will cause the master to re-assert across
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) * the whole cluster, freeing up mles */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) if (res->owner != dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) ret = dlm_do_master_request(res, mle, res->owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) /* give recovery a chance to run */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) msleep(500);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) goto recheck;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) spin_lock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) m = mle->master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) map_changed = (memcmp(mle->vote_map, mle->node_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) sizeof(mle->vote_map)) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) voting_done = (memcmp(mle->vote_map, mle->response_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) sizeof(mle->vote_map)) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) /* restart if we hit any errors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) if (map_changed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) int b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) mlog(0, "%s: %.*s: node map changed, restarting\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) dlm->name, res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) b = (mle->type == DLM_MLE_BLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) if ((*blocked && !b) || (!*blocked && b)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) mlog(0, "%s:%.*s: status change: old=%d new=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) dlm->name, res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) *blocked, b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) *blocked = b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) spin_unlock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) mlog(0, "%s:%.*s: restart lock mastery succeeded, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) "rechecking now\n", dlm->name, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) goto recheck;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) if (!voting_done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) mlog(0, "map not changed and voting not done "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) "for %s:%.*s\n", dlm->name, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) if (m != O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) /* another node has done an assert!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) * all done! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) sleep = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) sleep = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) /* have all nodes responded? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) if (voting_done && !*blocked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) if (dlm->node_num <= bit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) /* my node number is lowest.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) * now tell other nodes that I am
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) * mastering this. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) mle->master = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) /* ref was grabbed in get_lock_resource
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) * will be dropped in dlmlock_master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) assert = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) sleep = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) /* if voting is done, but we have not received
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) * an assert master yet, we must sleep */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) spin_unlock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) /* sleep if we haven't finished voting yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) if (sleep) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) unsigned long timeo = msecs_to_jiffies(DLM_MASTERY_TIMEOUT_MS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) atomic_set(&mle->woken, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) (void)wait_event_timeout(mle->wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) (atomic_read(&mle->woken) == 1),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) timeo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) if (res->owner == O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) mlog(0, "%s:%.*s: waiting again\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) goto recheck;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) mlog(0, "done waiting, master is %u\n", res->owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) ret = 0; /* done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) if (assert) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) m = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) mlog(0, "about to master %.*s here, this=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) res->lockname.len, res->lockname.name, m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) ret = dlm_do_assert_master(dlm, res, mle->vote_map, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) /* This is a failure in the network path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) * not in the response to the assert_master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) * (any nonzero response is a BUG on this node).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) * Most likely a socket just got disconnected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * due to node death. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) /* no longer need to restart lock mastery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) * all living nodes have been contacted. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) /* set the lockres owner */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) /* mastery reference obtained either during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) * assert_master_handler or in get_lock_resource */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) dlm_change_lockres_owner(dlm, res, m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) struct dlm_bitmap_diff_iter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) int curnode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) unsigned long *orig_bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) unsigned long *cur_bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) unsigned long diff_bm[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) enum dlm_node_state_change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) NODE_DOWN = -1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) NODE_NO_CHANGE = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) NODE_UP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) static void dlm_bitmap_diff_iter_init(struct dlm_bitmap_diff_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) unsigned long *orig_bm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) unsigned long *cur_bm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) unsigned long p1, p2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) iter->curnode = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) iter->orig_bm = orig_bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) iter->cur_bm = cur_bm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) for (i = 0; i < BITS_TO_LONGS(O2NM_MAX_NODES); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) p1 = *(iter->orig_bm + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) p2 = *(iter->cur_bm + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) iter->diff_bm[i] = (p1 & ~p2) | (p2 & ~p1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) static int dlm_bitmap_diff_iter_next(struct dlm_bitmap_diff_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) enum dlm_node_state_change *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) int bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) if (iter->curnode >= O2NM_MAX_NODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) bit = find_next_bit(iter->diff_bm, O2NM_MAX_NODES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) iter->curnode+1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) if (bit >= O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) iter->curnode = O2NM_MAX_NODES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) /* if it was there in the original then this node died */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) if (test_bit(bit, iter->orig_bm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) *state = NODE_DOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) *state = NODE_UP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) iter->curnode = bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) return bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) struct dlm_master_list_entry *mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) int blocked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) struct dlm_bitmap_diff_iter bdi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) enum dlm_node_state_change sc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) mlog(0, "something happened such that the "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) "master process may need to be restarted!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) assert_spin_locked(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) dlm_bitmap_diff_iter_init(&bdi, mle->vote_map, mle->node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) node = dlm_bitmap_diff_iter_next(&bdi, &sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) while (node >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) if (sc == NODE_UP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) /* a node came up. clear any old vote from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) * the response map and set it in the vote map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) * then restart the mastery. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) mlog(ML_NOTICE, "node %d up while restarting\n", node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) /* redo the master request, but only for the new node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) mlog(0, "sending request to new node\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) clear_bit(node, mle->response_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) set_bit(node, mle->vote_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) mlog(ML_ERROR, "node down! %d\n", node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) if (blocked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) int lowest = find_next_bit(mle->maybe_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) O2NM_MAX_NODES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) /* act like it was never there */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) clear_bit(node, mle->maybe_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) if (node == lowest) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) mlog(0, "expected master %u died"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) " while this node was blocked "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) "waiting on it!\n", node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) lowest = find_next_bit(mle->maybe_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) O2NM_MAX_NODES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) lowest+1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) if (lowest < O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) mlog(0, "%s:%.*s:still "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) "blocked. waiting on %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) "now\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) lowest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) /* mle is an MLE_BLOCK, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) * there is now nothing left to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) * block on. we need to return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) * all the way back out and try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) * again with an MLE_MASTER.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) * dlm_do_local_recovery_cleanup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) * has already run, so the mle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) * refcount is ok */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) mlog(0, "%s:%.*s: no "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) "longer blocking. try to "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) "master this here\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) mle->type = DLM_MLE_MASTER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) mle->mleres = res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) /* now blank out everything, as if we had never
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) * contacted anyone */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) memset(mle->response_map, 0, sizeof(mle->response_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) /* reset the vote_map to the current node_map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) memcpy(mle->vote_map, mle->node_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) sizeof(mle->node_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) /* put myself into the maybe map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) if (mle->type != DLM_MLE_BLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) set_bit(dlm->node_num, mle->maybe_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) node = dlm_bitmap_diff_iter_next(&bdi, &sc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) * DLM_MASTER_REQUEST_MSG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) * returns: 0 on success,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) * -errno on a network error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) * on error, the caller should assume the target node is "dead"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) static int dlm_do_master_request(struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) struct dlm_master_list_entry *mle, int to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) struct dlm_ctxt *dlm = mle->dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) struct dlm_master_request request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) int ret, response=0, resend;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) memset(&request, 0, sizeof(request));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) request.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) BUG_ON(mle->type == DLM_MLE_MIGRATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) request.namelen = (u8)mle->mnamelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) memcpy(request.name, mle->mname, request.namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) ret = o2net_send_message(DLM_MASTER_REQUEST_MSG, dlm->key, &request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) sizeof(request), to, &response);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) if (ret == -ESRCH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) /* should never happen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) mlog(ML_ERROR, "TCP stack not ready!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) } else if (ret == -EINVAL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) mlog(ML_ERROR, "bad args passed to o2net!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) } else if (ret == -ENOMEM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) mlog(ML_ERROR, "out of memory while trying to send "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) "network message! retrying\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) /* this is totally crude */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) msleep(50);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) } else if (!dlm_is_host_down(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) /* not a network error. bad. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) mlog(ML_ERROR, "unhandled error!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) /* all other errors should be network errors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) * and likely indicate node death */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) mlog(ML_ERROR, "link to %d went down!\n", to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) resend = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) spin_lock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) switch (response) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) case DLM_MASTER_RESP_YES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) set_bit(to, mle->response_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) mlog(0, "node %u is the master, response=YES\n", to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) mlog(0, "%s:%.*s: master node %u now knows I have a "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) "reference\n", dlm->name, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) res->lockname.name, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) mle->master = to;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) case DLM_MASTER_RESP_NO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) mlog(0, "node %u not master, response=NO\n", to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) set_bit(to, mle->response_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) case DLM_MASTER_RESP_MAYBE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) mlog(0, "node %u not master, response=MAYBE\n", to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) set_bit(to, mle->response_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) set_bit(to, mle->maybe_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) case DLM_MASTER_RESP_ERROR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) mlog(0, "node %u hit an error, resending\n", to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) resend = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) response = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) mlog(ML_ERROR, "bad response! %u\n", response);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) spin_unlock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) if (resend) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) /* this is also totally crude */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) msleep(50);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) * locks that can be taken here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) * dlm->spinlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) * res->spinlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) * mle->spinlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) * dlm->master_list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) * if possible, TRIM THIS DOWN!!!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) u8 response = DLM_MASTER_RESP_MAYBE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) struct dlm_lock_resource *res = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) struct dlm_master_request *request = (struct dlm_master_request *) msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) struct dlm_master_list_entry *mle = NULL, *tmpmle = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) char *name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) unsigned int namelen, hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) int found, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) int set_maybe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) int dispatch_assert = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) int dispatched = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) return DLM_MASTER_RESP_NO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) if (!dlm_domain_fully_joined(dlm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) response = DLM_MASTER_RESP_NO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) goto send_response;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) name = request->name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) namelen = request->namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) hash = dlm_lockid_hash(name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) if (namelen > DLM_LOCKID_NAME_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) response = DLM_IVBUFLEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) goto send_response;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) way_up_top:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) res = __dlm_lookup_lockres(dlm, name, namelen, hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) /* take care of the easy cases up front */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) * Right after dlm spinlock was released, dlm_thread could have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) * purged the lockres. Check if lockres got unhashed. If so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) * start over.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) if (hlist_unhashed(&res->hash_node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) goto way_up_top;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) if (res->state & (DLM_LOCK_RES_RECOVERING|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) DLM_LOCK_RES_MIGRATING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) mlog(0, "returning DLM_MASTER_RESP_ERROR since res is "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) "being recovered/migrated\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) response = DLM_MASTER_RESP_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) if (mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) kmem_cache_free(dlm_mle_cache, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) goto send_response;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) if (res->owner == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) dlm_lockres_set_refmap_bit(dlm, res, request->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) response = DLM_MASTER_RESP_YES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) if (mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) kmem_cache_free(dlm_mle_cache, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) /* this node is the owner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) * there is some extra work that needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) * happen now. the requesting node has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) * caused all nodes up to this one to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) * create mles. this node now needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) * go back and clean those up. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) dispatch_assert = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) goto send_response;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) } else if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) // mlog(0, "node %u is the master\n", res->owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) response = DLM_MASTER_RESP_NO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) if (mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) kmem_cache_free(dlm_mle_cache, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) goto send_response;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) /* ok, there is no owner. either this node is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) * being blocked, or it is actively trying to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) * master this lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) if (!(res->state & DLM_LOCK_RES_IN_PROGRESS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) mlog(ML_ERROR, "lock with no owner should be "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) "in-progress!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) // mlog(0, "lockres is in progress...\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) found = dlm_find_mle(dlm, &tmpmle, name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) if (!found) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) mlog(ML_ERROR, "no mle found for this lock!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) set_maybe = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) spin_lock(&tmpmle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) if (tmpmle->type == DLM_MLE_BLOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) // mlog(0, "this node is waiting for "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) // "lockres to be mastered\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) response = DLM_MASTER_RESP_NO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) } else if (tmpmle->type == DLM_MLE_MIGRATION) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) mlog(0, "node %u is master, but trying to migrate to "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) "node %u.\n", tmpmle->master, tmpmle->new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) if (tmpmle->master == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) mlog(ML_ERROR, "no owner on lockres, but this "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) "node is trying to migrate it to %u?!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) tmpmle->new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) /* the real master can respond on its own */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) response = DLM_MASTER_RESP_NO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) } else if (tmpmle->master != DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) set_maybe = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) if (tmpmle->master == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) response = DLM_MASTER_RESP_YES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) /* this node will be the owner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) * go back and clean the mles on any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) * other nodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) dispatch_assert = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) dlm_lockres_set_refmap_bit(dlm, res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) request->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) response = DLM_MASTER_RESP_NO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) // mlog(0, "this node is attempting to "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) // "master lockres\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) response = DLM_MASTER_RESP_MAYBE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) if (set_maybe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) set_bit(request->node_idx, tmpmle->maybe_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) spin_unlock(&tmpmle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) /* keep the mle attached to heartbeat events */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) dlm_put_mle(tmpmle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) if (mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) kmem_cache_free(dlm_mle_cache, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) goto send_response;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) * lockres doesn't exist on this node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) * if there is an MLE_BLOCK, return NO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) * if there is an MLE_MASTER, return MAYBE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) * otherwise, add an MLE_BLOCK, return NO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) found = dlm_find_mle(dlm, &tmpmle, name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) if (!found) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) /* this lockid has never been seen on this node yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) // mlog(0, "no mle found\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) if (!mle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) if (!mle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) response = DLM_MASTER_RESP_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) mlog_errno(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) goto send_response;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) goto way_up_top;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) // mlog(0, "this is second time thru, already allocated, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) // "add the block.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) set_bit(request->node_idx, mle->maybe_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) __dlm_insert_mle(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) response = DLM_MASTER_RESP_NO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) spin_lock(&tmpmle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) if (tmpmle->master == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) mlog(ML_ERROR, "no lockres, but an mle with this node as master!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) if (tmpmle->type == DLM_MLE_BLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) response = DLM_MASTER_RESP_NO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) else if (tmpmle->type == DLM_MLE_MIGRATION) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) mlog(0, "migration mle was found (%u->%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) tmpmle->master, tmpmle->new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) /* real master can respond on its own */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) response = DLM_MASTER_RESP_NO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) response = DLM_MASTER_RESP_MAYBE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) set_bit(request->node_idx, tmpmle->maybe_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) spin_unlock(&tmpmle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) if (found) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) /* keep the mle attached to heartbeat events */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) dlm_put_mle(tmpmle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) send_response:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) * __dlm_lookup_lockres() grabbed a reference to this lockres.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) * The reference is released by dlm_assert_master_worker() under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) * the call to dlm_dispatch_assert_master(). If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) * dlm_assert_master_worker() isn't called, we drop it here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) if (dispatch_assert) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) dlm->node_num, res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) DLM_ASSERT_MASTER_MLE_CLEANUP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) mlog(ML_ERROR, "failed to dispatch assert master work\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) response = DLM_MASTER_RESP_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) dispatched = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) __dlm_lockres_grab_inflight_worker(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) if (res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) if (!dispatched)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) return response;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) * DLM_ASSERT_MASTER_MSG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) * NOTE: this can be used for debugging
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) * can periodically run all locks owned by this node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) * and re-assert across the cluster...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) static int dlm_do_assert_master(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) void *nodemap, u32 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) struct dlm_assert_master assert;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) int to, tmpret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) struct dlm_node_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) int reassert;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) const char *lockname = res->lockname.name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) unsigned int namelen = res->lockname.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) BUG_ON(namelen > O2NM_MAX_NAME_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) res->state |= DLM_LOCK_RES_SETREF_INPROG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) reassert = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) /* note that if this nodemap is empty, it returns 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) dlm_node_iter_init(nodemap, &iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) while ((to = dlm_node_iter_next(&iter)) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) int r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) struct dlm_master_list_entry *mle = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) mlog(0, "sending assert master to %d (%.*s)\n", to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) namelen, lockname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) memset(&assert, 0, sizeof(assert));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) assert.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) assert.namelen = namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) memcpy(assert.name, lockname, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) assert.flags = cpu_to_be32(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) &assert, sizeof(assert), to, &r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) if (tmpret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) mlog(ML_ERROR, "Error %d when sending message %u (key "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) "0x%x) to node %u\n", tmpret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) DLM_ASSERT_MASTER_MSG, dlm->key, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) if (!dlm_is_host_down(tmpret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) mlog(ML_ERROR, "unhandled error=%d!\n", tmpret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) /* a node died. finish out the rest of the nodes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) mlog(0, "link to %d went down!\n", to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) /* any nonzero status return will do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) ret = tmpret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) } else if (r < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) /* ok, something horribly messed. kill thyself. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) mlog(ML_ERROR,"during assert master of %.*s to %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) "got %d.\n", namelen, lockname, to, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) if (dlm_find_mle(dlm, &mle, (char *)lockname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) namelen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) dlm_print_one_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) __dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) if (r & DLM_ASSERT_RESPONSE_REASSERT &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) !(r & DLM_ASSERT_RESPONSE_MASTERY_REF)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) mlog(ML_ERROR, "%.*s: very strange, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) "master MLE but no lockres on %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) namelen, lockname, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) if (r & DLM_ASSERT_RESPONSE_REASSERT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) mlog(0, "%.*s: node %u create mles on other "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) "nodes and requests a re-assert\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) namelen, lockname, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) reassert = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) if (r & DLM_ASSERT_RESPONSE_MASTERY_REF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) mlog(0, "%.*s: node %u has a reference to this "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) "lockres, set the bit in the refmap\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) namelen, lockname, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) dlm_lockres_set_refmap_bit(dlm, res, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) if (reassert)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) res->state &= ~DLM_LOCK_RES_SETREF_INPROG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) * locks that can be taken here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) * dlm->spinlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) * res->spinlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) * mle->spinlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) * dlm->master_list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) * if possible, TRIM THIS DOWN!!!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) struct dlm_master_list_entry *mle = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) struct dlm_assert_master *assert = (struct dlm_assert_master *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) struct dlm_lock_resource *res = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) char *name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) unsigned int namelen, hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) u32 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) int master_request = 0, have_lockres_ref = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) name = assert->name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) namelen = assert->namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) hash = dlm_lockid_hash(name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) flags = be32_to_cpu(assert->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) if (namelen > DLM_LOCKID_NAME_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) mlog(ML_ERROR, "Invalid name length!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) if (flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) mlog(0, "assert_master with flags: %u\n", flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) /* find the MLE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) if (!dlm_find_mle(dlm, &mle, name, namelen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) /* not an error, could be master just re-asserting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) mlog(0, "just got an assert_master from %u, but no "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) "MLE for it! (%.*s)\n", assert->node_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) namelen, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) int bit = find_next_bit (mle->maybe_map, O2NM_MAX_NODES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) if (bit >= O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) /* not necessarily an error, though less likely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) * could be master just re-asserting. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) mlog(0, "no bits set in the maybe_map, but %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) "is asserting! (%.*s)\n", assert->node_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) namelen, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) } else if (bit != assert->node_idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) if (flags & DLM_ASSERT_MASTER_MLE_CLEANUP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) mlog(0, "master %u was found, %u should "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) "back off\n", assert->node_idx, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) /* with the fix for bug 569, a higher node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) * number winning the mastery will respond
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) * YES to mastery requests, but this node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) * had no way of knowing. let it pass. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) mlog(0, "%u is the lowest node, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) "%u is asserting. (%.*s) %u must "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) "have begun after %u won.\n", bit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) assert->node_idx, namelen, name, bit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) assert->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) if (mle->type == DLM_MLE_MIGRATION) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) if (flags & DLM_ASSERT_MASTER_MLE_CLEANUP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) mlog(0, "%s:%.*s: got cleanup assert"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) " from %u for migration\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) dlm->name, namelen, name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) assert->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) } else if (!(flags & DLM_ASSERT_MASTER_FINISH_MIGRATION)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) mlog(0, "%s:%.*s: got unrelated assert"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) " from %u for migration, ignoring\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) dlm->name, namelen, name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) assert->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) __dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) /* ok everything checks out with the MLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) * now check to see if there is a lockres */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) res = __dlm_lookup_lockres(dlm, name, namelen, hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) if (res->state & DLM_LOCK_RES_RECOVERING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) mlog(ML_ERROR, "%u asserting but %.*s is "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) "RECOVERING!\n", assert->node_idx, namelen, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) goto kill;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) if (!mle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) res->owner != assert->node_idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) mlog(ML_ERROR, "DIE! Mastery assert from %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) "but current owner is %u! (%.*s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) assert->node_idx, res->owner, namelen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) __dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) } else if (mle->type != DLM_MLE_MIGRATION) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) /* owner is just re-asserting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) if (res->owner == assert->node_idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) mlog(0, "owner %u re-asserting on "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) "lock %.*s\n", assert->node_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) namelen, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) goto ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) mlog(ML_ERROR, "got assert_master from "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) "node %u, but %u is the owner! "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) "(%.*s)\n", assert->node_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) res->owner, namelen, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) goto kill;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) if (!(res->state & DLM_LOCK_RES_IN_PROGRESS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) mlog(ML_ERROR, "got assert from %u, but lock "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) "with no owner should be "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) "in-progress! (%.*s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) assert->node_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) namelen, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) goto kill;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) } else /* mle->type == DLM_MLE_MIGRATION */ {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) /* should only be getting an assert from new master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) if (assert->node_idx != mle->new_master) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) mlog(ML_ERROR, "got assert from %u, but "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) "new master is %u, and old master "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) "was %u (%.*s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) assert->node_idx, mle->new_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) mle->master, namelen, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) goto kill;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) ok:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) // mlog(0, "woo! got an assert_master from node %u!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) // assert->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) if (mle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) int extra_ref = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) int nn = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) int rr, err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) spin_lock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) extra_ref = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) /* MASTER mle: if any bits set in the response map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) * then the calling node needs to re-assert to clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) * up nodes that this node contacted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) nn+1)) < O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) if (nn != dlm->node_num && nn != assert->node_idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) master_request = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) mle->master = assert->node_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) atomic_set(&mle->woken, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) wake_up(&mle->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) spin_unlock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) int wake = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) if (mle->type == DLM_MLE_MIGRATION) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) mlog(0, "finishing off migration of lockres %.*s, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) "from %u to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) dlm->node_num, mle->new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) res->state &= ~DLM_LOCK_RES_MIGRATING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) wake = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) dlm_change_lockres_owner(dlm, res, mle->new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) BUG_ON(res->state & DLM_LOCK_RES_DIRTY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) dlm_change_lockres_owner(dlm, res, mle->master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) have_lockres_ref = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) if (wake)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) /* master is known, detach if not already detached.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) * ensures that only one assert_master call will happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) * on this mle. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) rr = kref_read(&mle->mle_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) if (mle->inuse > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) if (extra_ref && rr < 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) else if (!extra_ref && rr < 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) if (extra_ref && rr < 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) else if (!extra_ref && rr < 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) mlog(ML_ERROR, "%s:%.*s: got assert master from %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) "that will mess up this node, refs=%d, extra=%d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) "inuse=%d\n", dlm->name, namelen, name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) assert->node_idx, rr, extra_ref, mle->inuse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) dlm_print_one_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) __dlm_unlink_mle(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) __dlm_mle_detach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) __dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) if (extra_ref) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) /* the assert master message now balances the extra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) * ref given by the master / migration request message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) * if this is the last put, it will be removed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) * from the list. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) __dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) } else if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) if (res->owner != assert->node_idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) mlog(0, "assert_master from %u, but current "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) "owner is %u (%.*s), no mle\n", assert->node_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) res->owner, namelen, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) res->state |= DLM_LOCK_RES_SETREF_INPROG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) *ret_data = (void *)res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) if (master_request) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) mlog(0, "need to tell master to reassert\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) /* positive. negative would shoot down the node. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) ret |= DLM_ASSERT_RESPONSE_REASSERT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) if (!have_lockres_ref) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) mlog(ML_ERROR, "strange, got assert from %u, MASTER "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) "mle present here for %s:%.*s, but no lockres!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) assert->node_idx, dlm->name, namelen, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) if (have_lockres_ref) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) /* let the master know we have a reference to the lockres */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) ret |= DLM_ASSERT_RESPONSE_MASTERY_REF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) mlog(0, "%s:%.*s: got assert from %u, need a ref\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) dlm->name, namelen, name, assert->node_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) kill:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) /* kill the caller! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) mlog(ML_ERROR, "Bad message received from another node. Dumping state "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) "and killing the other node now! This node is OK and can continue.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) __dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) if (mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) __dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) *ret_data = (void *)res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) void dlm_assert_master_post_handler(int status, void *data, void *ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) struct dlm_lock_resource *res = (struct dlm_lock_resource *)ret_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) if (ret_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) res->state &= ~DLM_LOCK_RES_SETREF_INPROG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) int ignore_higher, u8 request_from, u32 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) struct dlm_work_item *item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) item = kzalloc(sizeof(*item), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) if (!item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) /* queue up work for dlm_assert_master_worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) item->u.am.lockres = res; /* already have a ref */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) /* can optionally ignore node numbers higher than this node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) item->u.am.ignore_higher = ignore_higher;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) item->u.am.request_from = request_from;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) item->u.am.flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) if (ignore_higher)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) spin_lock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) list_add_tail(&item->list, &dlm->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) spin_unlock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) queue_work(dlm->dlm_worker, &dlm->dispatched_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) unsigned long nodemap[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) int ignore_higher;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) int bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) u8 request_from;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) u32 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) dlm = item->dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) res = item->u.am.lockres;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) ignore_higher = item->u.am.ignore_higher;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) request_from = item->u.am.request_from;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) flags = item->u.am.flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) memcpy(nodemap, dlm->domain_map, sizeof(nodemap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) clear_bit(dlm->node_num, nodemap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) if (ignore_higher) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) /* if is this just to clear up mles for nodes below
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) * this node, do not send the message to the original
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) * caller or any node number higher than this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) clear_bit(request_from, nodemap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) bit = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) bit = find_next_bit(nodemap, O2NM_MAX_NODES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) bit+1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) if (bit >= O2NM_MAX_NODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) clear_bit(bit, nodemap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) * If we're migrating this lock to someone else, we are no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) * longer allowed to assert out own mastery. OTOH, we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) * prevent migration from starting while we're still asserting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) * our dominance. The reserved ast delays migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) if (res->state & DLM_LOCK_RES_MIGRATING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) mlog(0, "Someone asked us to assert mastery, but we're "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) "in the middle of migration. Skipping assert, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) "the new master will handle that.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) goto put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) __dlm_lockres_reserve_ast(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) /* this call now finishes out the nodemap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) * even if one or more nodes die */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) mlog(0, "worker about to master %.*s here, this=%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) res->lockname.len, res->lockname.name, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) ret = dlm_do_assert_master(dlm, res, nodemap, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) /* no need to restart, we are done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) if (!dlm_is_host_down(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) /* Ok, we've asserted ourselves. Let's let migration start. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) dlm_lockres_release_ast(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) dlm_lockres_drop_inflight_worker(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) mlog(0, "finished with dlm_assert_master_worker\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) /* SPECIAL CASE for the $RECOVERY lock used by the recovery thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) * We cannot wait for node recovery to complete to begin mastering this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) * lockres because this lockres is used to kick off recovery! ;-)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) * So, do a pre-check on all living nodes to see if any of those nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) * think that $RECOVERY is currently mastered by a dead node. If so,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) * we wait a short time to allow that node to get notified by its own
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) * heartbeat stack, then check again. All $RECOVERY lock resources
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) * mastered by dead nodes are purged when the heartbeat callback is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) * fired, so we can know for sure that it is safe to continue once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) * the node returns a live node or no node. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) struct dlm_node_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) int nodenum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) u8 master = DLM_LOCK_RES_OWNER_UNKNOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) dlm_node_iter_init(dlm->domain_map, &iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) /* do not send to self */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) if (nodenum == dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) ret = dlm_do_master_requery(dlm, res, nodenum, &master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) if (!dlm_is_host_down(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) /* host is down, so answer for that node would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) if (master != DLM_LOCK_RES_OWNER_UNKNOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) /* check to see if this master is in the recovery map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) if (test_bit(master, dlm->recovery_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) mlog(ML_NOTICE, "%s: node %u has not seen "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) "node %u go down yet, and thinks the "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) "dead node is mastering the recovery "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) "lock. must wait.\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) nodenum, master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) mlog(0, "%s: reco lock master is %u\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) * DLM_DEREF_LOCKRES_MSG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) struct dlm_deref_lockres deref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) int ret = 0, r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) const char *lockname;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) unsigned int namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) lockname = res->lockname.name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) namelen = res->lockname.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) BUG_ON(namelen > O2NM_MAX_NAME_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) memset(&deref, 0, sizeof(deref));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) deref.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) deref.namelen = namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) memcpy(deref.name, lockname, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) &deref, sizeof(deref), res->owner, &r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) mlog(ML_ERROR, "%s: res %.*s, error %d send DEREF to node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) dlm->name, namelen, lockname, ret, res->owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) else if (r < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) /* BAD. other node says I did not have a ref. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) mlog(ML_ERROR, "%s: res %.*s, DEREF to node %u got %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) dlm->name, namelen, lockname, res->owner, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) if (r == -ENOMEM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) ret = r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) struct dlm_deref_lockres *deref = (struct dlm_deref_lockres *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) struct dlm_lock_resource *res = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) char *name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) unsigned int namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) int ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) u8 node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) unsigned int hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) struct dlm_work_item *item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) int cleared = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) int dispatch = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) name = deref->name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) namelen = deref->namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) node = deref->node_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) if (namelen > DLM_LOCKID_NAME_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) mlog(ML_ERROR, "Invalid name length!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) if (deref->node_idx >= O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) mlog(ML_ERROR, "Invalid node number: %u\n", node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) hash = dlm_lockid_hash(name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) res = __dlm_lookup_lockres_full(dlm, name, namelen, hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) if (!res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) mlog(ML_ERROR, "%s:%.*s: bad lockres name\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) dlm->name, namelen, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) if (res->state & DLM_LOCK_RES_SETREF_INPROG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) dispatch = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) if (test_bit(node, res->refmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) dlm_lockres_clear_refmap_bit(dlm, res, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) cleared = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) if (!dispatch) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) if (cleared)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) dlm_lockres_calc_usage(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) "but it is already dropped!\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) res->lockname.len, res->lockname.name, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) ret = DLM_DEREF_RESPONSE_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) item = kzalloc(sizeof(*item), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) if (!item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) dlm_init_work_item(dlm, item, dlm_deref_lockres_worker, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) item->u.dl.deref_res = res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) item->u.dl.deref_node = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) spin_lock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) list_add_tail(&item->list, &dlm->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) spin_unlock(&dlm->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) queue_work(dlm->dlm_worker, &dlm->dispatched_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) return DLM_DEREF_RESPONSE_INPROG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) if (res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) struct dlm_deref_lockres_done *deref
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) = (struct dlm_deref_lockres_done *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) struct dlm_lock_resource *res = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) char *name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) unsigned int namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) int ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) u8 node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) unsigned int hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) name = deref->name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) namelen = deref->namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) node = deref->node_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) if (namelen > DLM_LOCKID_NAME_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) mlog(ML_ERROR, "Invalid name length!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) if (deref->node_idx >= O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) mlog(ML_ERROR, "Invalid node number: %u\n", node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) hash = dlm_lockid_hash(name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) res = __dlm_lookup_lockres_full(dlm, name, namelen, hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) if (!res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) mlog(ML_ERROR, "%s:%.*s: bad lockres name\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) dlm->name, namelen, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) if (!(res->state & DLM_LOCK_RES_DROPPING_REF)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) mlog(ML_NOTICE, "%s:%.*s: node %u sends deref done "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) "but it is already derefed!\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) res->lockname.len, res->lockname.name, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) __dlm_do_purge_lockres(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) if (res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) static void dlm_drop_lockres_ref_done(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) struct dlm_lock_resource *res, u8 node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) struct dlm_deref_lockres_done deref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) int ret = 0, r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) const char *lockname;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) unsigned int namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) lockname = res->lockname.name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) namelen = res->lockname.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) BUG_ON(namelen > O2NM_MAX_NAME_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) memset(&deref, 0, sizeof(deref));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) deref.node_idx = dlm->node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) deref.namelen = namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) memcpy(deref.name, lockname, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) ret = o2net_send_message(DLM_DEREF_LOCKRES_DONE, dlm->key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) &deref, sizeof(deref), node, &r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) mlog(ML_ERROR, "%s: res %.*s, error %d send DEREF DONE "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) " to node %u\n", dlm->name, namelen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) lockname, ret, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) } else if (r < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) /* ignore the error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) mlog(ML_ERROR, "%s: res %.*s, DEREF to node %u got %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) dlm->name, namelen, lockname, node, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) struct dlm_ctxt *dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) u8 node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) u8 cleared = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) dlm = item->dlm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) res = item->u.dl.deref_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) node = item->u.dl.deref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) if (test_bit(node, res->refmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) dlm_lockres_clear_refmap_bit(dlm, res, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) cleared = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) dlm_drop_lockres_ref_done(dlm, res, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) if (cleared) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) mlog(0, "%s:%.*s node %u ref dropped in dispatch\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) dlm->name, res->lockname.len, res->lockname.name, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) dlm_lockres_calc_usage(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) "but it is already dropped!\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) res->lockname.len, res->lockname.name, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) * A migratable resource is one that is :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) * 1. locally mastered, and,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) * 2. zero local locks, and,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) * 3. one or more non-local locks, or, one or more references
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) * Returns 1 if yes, 0 if not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) static int dlm_is_lockres_migratable(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) enum dlm_lockres_list idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) int nonlocal = 0, node_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) struct list_head *queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) struct dlm_lock *lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) u64 cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) /* delay migration when the lockres is in MIGRATING state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) if (res->state & DLM_LOCK_RES_MIGRATING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) /* delay migration when the lockres is in RECOCERING state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) if (res->state & (DLM_LOCK_RES_RECOVERING|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) DLM_LOCK_RES_RECOVERY_WAITING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) if (res->owner != dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) queue = dlm_list_idx_to_ptr(res, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) list_for_each_entry(lock, queue, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) if (lock->ml.node != dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) nonlocal++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) cookie = be64_to_cpu(lock->ml.cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) mlog(0, "%s: Not migratable res %.*s, lock %u:%llu on "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) "%s list\n", dlm->name, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) dlm_get_lock_cookie_node(cookie),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) dlm_get_lock_cookie_seq(cookie),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) dlm_list_in_text(idx));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) if (!nonlocal) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) if (node_ref >= O2NM_MAX_NODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) mlog(0, "%s: res %.*s, Migratable\n", dlm->name, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) * DLM_MIGRATE_LOCKRES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) struct dlm_lock_resource *res, u8 target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) struct dlm_master_list_entry *mle = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) struct dlm_master_list_entry *oldmle = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) struct dlm_migratable_lockres *mres = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) const char *name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) unsigned int namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) int mle_added = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) int wake = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) name = res->lockname.name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) namelen = res->lockname.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) /* preallocate up front. if this fails, abort */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) if (!mres) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) if (!mle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) * clear any existing master requests and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) * add the migration mle to the list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) namelen, target, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) /* get an extra reference on the mle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) * otherwise the assert_master from the new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) * master will destroy this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) if (ret != -EEXIST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) dlm_get_mle_inuse(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) if (ret == -EEXIST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) mlog(0, "another process is already migrating it\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) mle_added = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) * set the MIGRATING flag and flush asts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) * if we fail after this we need to re-dirty the lockres
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) if (dlm_mark_lockres_migrating(dlm, res, target) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) mlog(ML_ERROR, "tried to migrate %.*s to %u, but "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) "the target went down.\n", res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) res->lockname.name, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) res->state &= ~DLM_LOCK_RES_MIGRATING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) wake = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) if (ret != -EEXIST && oldmle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) /* master is known, detach if not already detached */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) dlm_mle_detach_hb_events(dlm, oldmle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) dlm_put_mle(oldmle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) if (mle_added) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) dlm_mle_detach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) dlm_put_mle_inuse(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) } else if (mle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) kmem_cache_free(dlm_mle_cache, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) mle = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) * at this point, we have a migration target, an mle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) * in the master list, and the MIGRATING flag set on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) * the lockres
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) /* now that remote nodes are spinning on the MIGRATING flag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) * ensure that all assert_master work is flushed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) flush_workqueue(dlm->dlm_worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) /* notify new node and send all lock state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) /* call send_one_lockres with migration flag.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) * this serves as notice to the target node that a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) * migration is starting. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) ret = dlm_send_one_lockres(dlm, res, mres, target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) DLM_MRES_MIGRATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) mlog(0, "migration to node %u failed with %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) target, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) /* migration failed, detach and clean up mle */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) dlm_mle_detach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) dlm_put_mle_inuse(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) res->state &= ~DLM_LOCK_RES_MIGRATING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) wake = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) if (dlm_is_host_down(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) dlm_wait_for_node_death(dlm, target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) DLM_NODE_DEATH_WAIT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) /* at this point, the target sends a message to all nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) * (using dlm_do_migrate_request). this node is skipped since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) * we had to put an mle in the list to begin the process. this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) * node now waits for target to do an assert master. this node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) * will be the last one notified, ensuring that the migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) * is complete everywhere. if the target dies while this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) * going on, some nodes could potentially see the target as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) * master, so it is important that my recovery finds the migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) * mle and sets the master to UNKNOWN. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) /* wait for new node to assert master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) ret = wait_event_interruptible_timeout(mle->wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) (atomic_read(&mle->woken) == 1),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) msecs_to_jiffies(5000));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) if (ret >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) if (atomic_read(&mle->woken) == 1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) res->owner == target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) mlog(0, "%s:%.*s: timed out during migration\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) dlm->name, res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) /* avoid hang during shutdown when migrating lockres
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) * to a node which also goes down */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) if (dlm_is_node_dead(dlm, target)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) mlog(0, "%s:%.*s: expected migration "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) "target %u is no longer up, restarting\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) dlm->name, res->lockname.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) res->lockname.name, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) /* migration failed, detach and clean up mle */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) dlm_mle_detach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) dlm_put_mle_inuse(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) res->state &= ~DLM_LOCK_RES_MIGRATING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) wake = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) mlog(0, "%s:%.*s: caught signal during migration\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) dlm->name, res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) /* all done, set the owner, clear the flag */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) dlm_set_lockres_owner(dlm, res, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) res->state &= ~DLM_LOCK_RES_MIGRATING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) dlm_remove_nonlocal_locks(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) /* master is known, detach if not already detached */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) dlm_mle_detach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) dlm_put_mle_inuse(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) dlm_lockres_calc_usage(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) /* re-dirty the lockres if we failed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) dlm_kick_thread(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) /* wake up waiters if the MIGRATING flag got set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) * but migration failed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) if (wake)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) if (mres)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) free_page((unsigned long)mres);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) mlog(0, "%s: Migrating %.*s to %u, returns %d\n", dlm->name, namelen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) name, target, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) * Should be called only after beginning the domain leave process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) * There should not be any remaining locks on nonlocal lock resources,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) * and there should be no local locks left on locally mastered resources.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) * Called with the dlm spinlock held, may drop it to do migration, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) * will re-acquire before exit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) __must_hold(&dlm->spinlock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) int lock_dropped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) u8 target = O2NM_MAX_NODES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) if (dlm_is_lockres_migratable(dlm, res))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) target = dlm_pick_migration_target(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) if (target == O2NM_MAX_NODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) lock_dropped = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) ret = dlm_migrate_lockres(dlm, res, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) dlm->name, res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) target, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) return lock_dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) spin_lock(&dlm->ast_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) spin_lock(&lock->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) ret = (list_empty(&lock->bast_list) && !lock->bast_pending);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) spin_unlock(&lock->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) spin_unlock(&dlm->ast_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) static int dlm_migration_can_proceed(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) u8 mig_target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) int can_proceed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) /* target has died, so make the caller break out of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) * wait_event, but caller must recheck the domain_map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) if (!test_bit(mig_target, dlm->domain_map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) can_proceed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) return can_proceed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) static int dlm_lockres_is_dirty(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) ret = !!(res->state & DLM_LOCK_RES_DIRTY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) u8 target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) mlog(0, "dlm_mark_lockres_migrating: %.*s, from %u to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) res->lockname.len, res->lockname.name, dlm->node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) /* need to set MIGRATING flag on lockres. this is done by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) * ensuring that all asts have been flushed for this lockres. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) BUG_ON(res->migration_pending);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) res->migration_pending = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) /* strategy is to reserve an extra ast then release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) * it below, letting the release do all of the work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) __dlm_lockres_reserve_ast(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) /* now flush all the pending asts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) dlm_kick_thread(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) /* before waiting on DIRTY, block processes which may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) * try to dirty the lockres before MIGRATING is set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) BUG_ON(res->state & DLM_LOCK_RES_BLOCK_DIRTY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) res->state |= DLM_LOCK_RES_BLOCK_DIRTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) /* now wait on any pending asts and the DIRTY state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) dlm_lockres_release_ast(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) mlog(0, "about to wait on migration_wq, dirty=%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) /* if the extra ref we just put was the final one, this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) * will pass thru immediately. otherwise, we need to wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) * for the last ast to finish. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) ret = wait_event_interruptible_timeout(dlm->migration_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) dlm_migration_can_proceed(dlm, res, target),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) msecs_to_jiffies(1000));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) mlog(0, "woken again: migrating? %s, dead? %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) res->state & DLM_LOCK_RES_MIGRATING ? "yes":"no",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) test_bit(target, dlm->domain_map) ? "no":"yes");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) mlog(0, "all is well: migrating? %s, dead? %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) res->state & DLM_LOCK_RES_MIGRATING ? "yes":"no",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) test_bit(target, dlm->domain_map) ? "no":"yes");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) if (!dlm_migration_can_proceed(dlm, res, target)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) mlog(0, "trying again...\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) /* did the target go down or die? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) if (!test_bit(target, dlm->domain_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) mlog(ML_ERROR, "aha. migration target %u just went down\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) ret = -EHOSTDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) * another try; otherwise, we are sure the MIGRATING state is there,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) * drop the unneeded state which blocked threads trying to DIRTY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) res->migration_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) * at this point:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) * o the DLM_LOCK_RES_MIGRATING flag is set if target not down
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) * o there are no pending asts on this lockres
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) * o all processes trying to reserve an ast on this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) * lockres must wait for the MIGRATING flag to clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) /* last step in the migration process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) * original master calls this to free all of the dlm_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) * structures that used to be for other nodes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) struct list_head *queue = &res->granted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) int i, bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) struct dlm_lock *lock, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) BUG_ON(res->owner == dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) for (i=0; i<3; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) list_for_each_entry_safe(lock, next, queue, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) if (lock->ml.node != dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) mlog(0, "putting lock for node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) lock->ml.node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) /* be extra careful */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) BUG_ON(!list_empty(&lock->ast_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) BUG_ON(!list_empty(&lock->bast_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) BUG_ON(lock->ast_pending);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) BUG_ON(lock->bast_pending);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) dlm_lockres_clear_refmap_bit(dlm, res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) lock->ml.node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) list_del_init(&lock->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) /* In a normal unlock, we would have added a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) * DLM_UNLOCK_FREE_LOCK action. Force it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) dlm_lock_put(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) queue++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) bit = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) if (bit >= O2NM_MAX_NODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) /* do not clear the local node reference, if there is a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) * process holding this, let it drop the ref itself */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) if (bit != dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) mlog(0, "%s:%.*s: node %u had a ref to this "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) "migrating lockres, clearing\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) res->lockname.len, res->lockname.name, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) dlm_lockres_clear_refmap_bit(dlm, res, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) bit++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) * Pick a node to migrate the lock resource to. This function selects a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) * potential target based first on the locks and then on refmap. It skips
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) * nodes that are in the process of exiting the domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) enum dlm_lockres_list idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) struct list_head *queue = &res->granted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) struct dlm_lock *lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) int noderef;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) u8 nodenum = O2NM_MAX_NODES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) /* Go through all the locks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) queue = dlm_list_idx_to_ptr(res, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) list_for_each_entry(lock, queue, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) if (lock->ml.node == dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) if (test_bit(lock->ml.node, dlm->exit_domain_map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) nodenum = lock->ml.node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) /* Go thru the refmap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) noderef = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) noderef = find_next_bit(res->refmap, O2NM_MAX_NODES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) noderef + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) if (noderef >= O2NM_MAX_NODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) if (noderef == dlm->node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) if (test_bit(noderef, dlm->exit_domain_map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) nodenum = noderef;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) return nodenum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) /* this is called by the new master once all lockres
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) * data has been received */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) u8 master, u8 new_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) struct dlm_node_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) struct dlm_migrate_request migrate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) int ret, skip, status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) int nodenum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) memset(&migrate, 0, sizeof(migrate));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) migrate.namelen = res->lockname.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) memcpy(migrate.name, res->lockname.name, migrate.namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) migrate.new_master = new_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) migrate.master = master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) /* send message to all nodes, except the master and myself */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) while ((nodenum = dlm_node_iter_next(iter)) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) if (nodenum == master ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) nodenum == new_master)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) /* We could race exit domain. If exited, skip. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) skip = (!test_bit(nodenum, dlm->domain_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) if (skip) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) clear_bit(nodenum, iter->node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) ret = o2net_send_message(DLM_MIGRATE_REQUEST_MSG, dlm->key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) &migrate, sizeof(migrate), nodenum,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) mlog(ML_ERROR, "%s: res %.*s, Error %d send "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) "MIGRATE_REQUEST to node %u\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) migrate.namelen, migrate.name, ret, nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) if (!dlm_is_host_down(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) mlog(ML_ERROR, "unhandled error=%d!\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) clear_bit(nodenum, iter->node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) } else if (status < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) mlog(0, "migrate request (node %u) returned %d!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) nodenum, status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) ret = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) } else if (status == DLM_MIGRATE_RESPONSE_MASTERY_REF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) /* during the migration request we short-circuited
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) * the mastery of the lockres. make sure we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) * a mastery ref for nodenum */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) mlog(0, "%s:%.*s: need ref for node %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) dlm->name, res->lockname.len, res->lockname.name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) dlm_lockres_set_refmap_bit(dlm, res, nodenum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) mlog(0, "returning ret=%d\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) /* if there is an existing mle for this lockres, we now know who the master is.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) * (the one who sent us *this* message) we can clear it up right away.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) * since the process that put the mle on the list still has a reference to it,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) * we can unhash it now, set the master and wake the process. as a result,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) * we will have no mle in the list to start with. now we can add an mle for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) * the migration and this should be the only one found for those scanning the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) * list. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) struct dlm_ctxt *dlm = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) struct dlm_lock_resource *res = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) struct dlm_migrate_request *migrate = (struct dlm_migrate_request *) msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) struct dlm_master_list_entry *mle = NULL, *oldmle = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) const char *name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) unsigned int namelen, hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) if (!dlm_grab(dlm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) name = migrate->name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) namelen = migrate->namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) hash = dlm_lockid_hash(name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) /* preallocate.. if this fails, abort */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) if (!mle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) /* check for pre-existing lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) res = __dlm_lookup_lockres(dlm, name, namelen, hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) if (res->state & DLM_LOCK_RES_RECOVERING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) /* if all is working ok, this can only mean that we got
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) * a migrate request from a node that we now see as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) * dead. what can we do here? drop it to the floor? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) mlog(ML_ERROR, "Got a migrate request, but the "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) "lockres is marked as recovering!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) kmem_cache_free(dlm_mle_cache, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) ret = -EINVAL; /* need a better solution */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) res->state |= DLM_LOCK_RES_MIGRATING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) /* ignore status. only nonzero status would BUG. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) ret = dlm_add_migration_mle(dlm, res, mle, &oldmle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) name, namelen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) migrate->new_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) migrate->master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) kmem_cache_free(dlm_mle_cache, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) if (oldmle) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) /* master is known, detach if not already detached */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) dlm_mle_detach_hb_events(dlm, oldmle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) dlm_put_mle(oldmle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) if (res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) dlm_put(dlm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) /* must be holding dlm->spinlock and dlm->master_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) * when adding a migration mle, we can clear any other mles
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) * in the master list because we know with certainty that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) * the master is "master". so we remove any old mle from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) * the list after setting it's master field, and then add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) * the new migration mle. this way we can hold with the rule
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) * of having only one mle for a given lock name at all times. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) struct dlm_master_list_entry *mle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) struct dlm_master_list_entry **oldmle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) const char *name, unsigned int namelen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) u8 new_master, u8 master)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) int found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) *oldmle = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) assert_spin_locked(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) /* caller is responsible for any ref taken here on oldmle */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) found = dlm_find_mle(dlm, oldmle, (char *)name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) if (found) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) struct dlm_master_list_entry *tmp = *oldmle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) spin_lock(&tmp->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) if (tmp->type == DLM_MLE_MIGRATION) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) if (master == dlm->node_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) /* ah another process raced me to it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) mlog(0, "tried to migrate %.*s, but some "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) "process beat me to it\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) namelen, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) spin_unlock(&tmp->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) /* bad. 2 NODES are trying to migrate! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) mlog(ML_ERROR, "migration error mle: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) "master=%u new_master=%u // request: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) "master=%u new_master=%u // "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) "lockres=%.*s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) tmp->master, tmp->new_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) master, new_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) namelen, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) /* this is essentially what assert_master does */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) tmp->master = master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) atomic_set(&tmp->woken, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) wake_up(&tmp->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) /* remove it so that only one mle will be found */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) __dlm_unlink_mle(dlm, tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) __dlm_mle_detach_hb_events(dlm, tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) if (tmp->type == DLM_MLE_MASTER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) "telling master to get ref "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) "for cleared out mle during "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) "migration\n", dlm->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) namelen, name, master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) spin_unlock(&tmp->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) /* now add a migration mle to the tail of the list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) dlm_init_mle(mle, DLM_MLE_MIGRATION, dlm, res, name, namelen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) mle->new_master = new_master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) /* the new master will be sending an assert master for this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) * at that point we will get the refmap reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) mle->master = master;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) /* do this for consistency with other mle types */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) set_bit(new_master, mle->maybe_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) __dlm_insert_mle(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) * Sets the owner of the lockres, associated to the mle, to UNKNOWN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) static struct dlm_lock_resource *dlm_reset_mleres_owner(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) struct dlm_master_list_entry *mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) /* Find the lockres associated to the mle and set its owner to UNK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) res = __dlm_lookup_lockres(dlm, mle->mname, mle->mnamelen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) mle->mnamehash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) /* move lockres onto recovery list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) dlm_move_lockres_to_recovery_list(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) dlm_lockres_put(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) /* about to get rid of mle, detach from heartbeat */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) __dlm_mle_detach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) /* dump the mle */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) __dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) return res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) static void dlm_clean_migration_mle(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) struct dlm_master_list_entry *mle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) __dlm_mle_detach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) spin_lock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) __dlm_unlink_mle(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) atomic_set(&mle->woken, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) spin_unlock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) wake_up(&mle->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) static void dlm_clean_block_mle(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) struct dlm_master_list_entry *mle, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) int bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) BUG_ON(mle->type != DLM_MLE_BLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) spin_lock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) if (bit != dead_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) mlog(0, "mle found, but dead node %u would not have been "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) "master\n", dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) spin_unlock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) /* Must drop the refcount by one since the assert_master will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) * never arrive. This may result in the mle being unlinked and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) * freed, but there may still be a process waiting in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) * dlmlock path which is fine. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) mlog(0, "node %u was expected master\n", dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) atomic_set(&mle->woken, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) spin_unlock(&mle->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) wake_up(&mle->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) /* Do not need events any longer, so detach from heartbeat */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) __dlm_mle_detach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) __dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) struct dlm_master_list_entry *mle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) struct dlm_lock_resource *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) struct hlist_head *bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) struct hlist_node *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) top:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) assert_spin_locked(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) /* clean the master list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) for (i = 0; i < DLM_HASH_BUCKETS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) bucket = dlm_master_hash(dlm, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) BUG_ON(mle->type != DLM_MLE_BLOCK &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) mle->type != DLM_MLE_MASTER &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) mle->type != DLM_MLE_MIGRATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) /* MASTER mles are initiated locally. The waiting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) * process will notice the node map change shortly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) * Let that happen as normal. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) if (mle->type == DLM_MLE_MASTER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) /* BLOCK mles are initiated by other nodes. Need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) * clean up if the dead node would have been the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) * master. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) if (mle->type == DLM_MLE_BLOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) dlm_clean_block_mle(dlm, mle, dead_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) /* Everything else is a MIGRATION mle */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) /* The rule for MIGRATION mles is that the master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) * becomes UNKNOWN if *either* the original or the new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) * master dies. All UNKNOWN lockres' are sent to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) * whichever node becomes the recovery master. The new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) * master is responsible for determining if there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) * still a master for this lockres, or if he needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) * take over mastery. Either way, this node should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) * expect another message to resolve this. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) if (mle->master != dead_node &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) mle->new_master != dead_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) if (mle->new_master == dead_node && mle->inuse) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) mlog(ML_NOTICE, "%s: target %u died during "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) "migration from %u, the MLE is "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) "still keep used, ignore it!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) dlm->name, dead_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) mle->master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) /* If we have reached this point, this mle needs to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) * removed from the list and freed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) dlm_clean_migration_mle(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) mlog(0, "%s: node %u died during migration from "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) "%u to %u!\n", dlm->name, dead_node, mle->master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) mle->new_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) /* If we find a lockres associated with the mle, we've
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) * hit this rare case that messes up our lock ordering.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) * If so, we need to drop the master lock so that we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) * take the lockres lock, meaning that we will have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) * restart from the head of list. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) res = dlm_reset_mleres_owner(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) if (res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) /* restart */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) goto top;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) /* This may be the last reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) __dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) u8 old_master)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) struct dlm_node_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) dlm_node_iter_init(dlm->domain_map, &iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) clear_bit(old_master, iter.node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) clear_bit(dlm->node_num, iter.node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) /* ownership of the lockres is changing. account for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) * mastery reference here since old_master will briefly have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) * a reference after the migration completes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) dlm_lockres_set_refmap_bit(dlm, res, old_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) mlog(0, "now time to do a migrate request to other nodes\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) ret = dlm_do_migrate_request(dlm, res, old_master,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) dlm->node_num, &iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) goto leave;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) mlog(0, "doing assert master of %.*s to all except the original node\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) res->lockname.len, res->lockname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) /* this call now finishes out the nodemap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) * even if one or more nodes die */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) ret = dlm_do_assert_master(dlm, res, iter.node_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) DLM_ASSERT_MASTER_FINISH_MIGRATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) /* no longer need to retry. all living nodes contacted. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) memset(iter.node_map, 0, sizeof(iter.node_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) set_bit(old_master, iter.node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) mlog(0, "doing assert master of %.*s back to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) res->lockname.len, res->lockname.name, old_master);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) ret = dlm_do_assert_master(dlm, res, iter.node_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) DLM_ASSERT_MASTER_FINISH_MIGRATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) mlog(0, "assert master to original master failed "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) "with %d.\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) /* the only nonzero status here would be because of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) * a dead original node. we're done. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) /* all done, set the owner, clear the flag */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) spin_lock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) dlm_set_lockres_owner(dlm, res, dlm->node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) res->state &= ~DLM_LOCK_RES_MIGRATING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) /* re-dirty it on the new master */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) dlm_kick_thread(dlm, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) leave:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) * LOCKRES AST REFCOUNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) * this is integral to migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) /* for future intent to call an ast, reserve one ahead of time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) * this should be called only after waiting on the lockres
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) * with dlm_wait_on_lockres, and while still holding the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) * spinlock after the call. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) void __dlm_lockres_reserve_ast(struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) assert_spin_locked(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) if (res->state & DLM_LOCK_RES_MIGRATING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) __dlm_print_one_lock_resource(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) BUG_ON(res->state & DLM_LOCK_RES_MIGRATING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) atomic_inc(&res->asts_reserved);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) * used to drop the reserved ast, either because it went unused,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) * or because the ast/bast was actually called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) * also, if there is a pending migration on this lockres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) * and this was the last pending ast on the lockres,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) * atomically set the MIGRATING flag before we drop the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) * this is how we ensure that migration can proceed with no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) * asts in progress. note that it is ok if the state of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) * queues is such that a lock should be granted in the future
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) * or that a bast should be fired, because the new master will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) * shuffle the lists on this lockres as soon as it is migrated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) void dlm_lockres_release_ast(struct dlm_ctxt *dlm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) struct dlm_lock_resource *res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) if (!atomic_dec_and_lock(&res->asts_reserved, &res->spinlock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) if (!res->migration_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) BUG_ON(res->state & DLM_LOCK_RES_MIGRATING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) res->migration_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) res->state |= DLM_LOCK_RES_MIGRATING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) spin_unlock(&res->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) wake_up(&res->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) wake_up(&dlm->migration_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) void dlm_force_free_mles(struct dlm_ctxt *dlm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) struct hlist_head *bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) struct dlm_master_list_entry *mle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) struct hlist_node *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) * We notified all other nodes that we are exiting the domain and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) * around we force free them and wake any processes that are waiting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) * on the mles
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) spin_lock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) spin_lock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) for (i = 0; i < DLM_HASH_BUCKETS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) bucket = dlm_master_hash(dlm, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) if (mle->type != DLM_MLE_BLOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) mlog(ML_ERROR, "bad mle: %p\n", mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) dlm_print_one_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) atomic_set(&mle->woken, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) wake_up(&mle->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) __dlm_unlink_mle(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) __dlm_mle_detach_hb_events(dlm, mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) __dlm_put_mle(mle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) spin_unlock(&dlm->master_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) spin_unlock(&dlm->spinlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) }