^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* -*- mode: c; c-basic-offset: 8; -*-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * vim: noexpandtab sw=8 ts=8 sts=0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2004, 2005 Oracle. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/jiffies.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/bio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/configfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/crc32.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/time.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/debugfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/bitmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/ktime.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include "heartbeat.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include "tcp.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include "nodemanager.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include "quorum.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include "masklog.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * The first heartbeat pass had one global thread that would serialize all hb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * callback calls. This global serializing sem should only be removed once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * we've made sure that all callees can deal with being called concurrently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * from multiple hb region threads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) static DECLARE_RWSEM(o2hb_callback_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * multiple hb threads are watching multiple regions. A node is live
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * whenever any of the threads sees activity from the node in its region.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static DEFINE_SPINLOCK(o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) static struct list_head o2hb_live_slots[O2NM_MAX_NODES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) static LIST_HEAD(o2hb_node_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * In global heartbeat, we maintain a series of region bitmaps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * - o2hb_region_bitmap allows us to limit the region number to max region.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * - o2hb_live_region_bitmap tracks live regions (seen steady iterations).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * - o2hb_quorum_region_bitmap tracks live regions that have seen all nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * heartbeat on it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * - o2hb_failed_region_bitmap tracks the regions that have seen io timeouts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) static unsigned long o2hb_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) static unsigned long o2hb_live_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) static unsigned long o2hb_quorum_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #define O2HB_DB_TYPE_LIVENODES 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #define O2HB_DB_TYPE_LIVEREGIONS 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #define O2HB_DB_TYPE_QUORUMREGIONS 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #define O2HB_DB_TYPE_FAILEDREGIONS 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #define O2HB_DB_TYPE_REGION_LIVENODES 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #define O2HB_DB_TYPE_REGION_NUMBER 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #define O2HB_DB_TYPE_REGION_PINNED 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) struct o2hb_debug_buf {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) int db_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) int db_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) int db_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) void *db_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) static struct o2hb_debug_buf *o2hb_db_livenodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) static struct o2hb_debug_buf *o2hb_db_liveregions;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) static struct o2hb_debug_buf *o2hb_db_quorumregions;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) static struct o2hb_debug_buf *o2hb_db_failedregions;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) #define O2HB_DEBUG_DIR "o2hb"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) #define O2HB_DEBUG_LIVENODES "livenodes"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) #define O2HB_DEBUG_LIVEREGIONS "live_regions"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) #define O2HB_DEBUG_QUORUMREGIONS "quorum_regions"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #define O2HB_DEBUG_FAILEDREGIONS "failed_regions"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) #define O2HB_DEBUG_REGION_NUMBER "num"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) #define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #define O2HB_DEBUG_REGION_PINNED "pinned"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) static struct dentry *o2hb_debug_dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) static LIST_HEAD(o2hb_all_regions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) static struct o2hb_callback {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) } o2hb_callbacks[O2HB_NUM_CB];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) enum o2hb_heartbeat_modes {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) O2HB_HEARTBEAT_LOCAL = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) O2HB_HEARTBEAT_GLOBAL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) O2HB_HEARTBEAT_NUM_MODES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) static const char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) "local", /* O2HB_HEARTBEAT_LOCAL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) "global", /* O2HB_HEARTBEAT_GLOBAL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) static unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * o2hb_dependent_users tracks the number of registered callbacks that depend
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * on heartbeat. o2net and o2dlm are two entities that register this callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * However only o2dlm depends on the heartbeat. It does not want the heartbeat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * to stop while a dlm domain is still active.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) static unsigned int o2hb_dependent_users;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * In global heartbeat mode, all regions are pinned if there are one or more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * regions are unpinned if the region count exceeds the cut off or the number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * of dependent users falls to zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) #define O2HB_PIN_CUT_OFF 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * In local heartbeat mode, we assume the dlm domain name to be the same as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * region uuid. This is true for domains created for the file system but not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * necessarily true for userdlm domains. This is a known limitation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * In global heartbeat mode, we pin/unpin all o2hb regions. This solution
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * works for both file system and userdlm domains.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) static int o2hb_region_pin(const char *region_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) static void o2hb_region_unpin(const char *region_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) /* Only sets a new threshold if there are no active regions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * No locking or otherwise interesting code is required for reading
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * o2hb_dead_threshold as it can't change once regions are active and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * it's not interesting to anyone until then anyway. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) static void o2hb_dead_threshold_set(unsigned int threshold)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) if (threshold > O2HB_MIN_DEAD_THRESHOLD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) if (list_empty(&o2hb_all_regions))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) o2hb_dead_threshold = threshold;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) static int o2hb_global_heartbeat_mode_set(unsigned int hb_mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) int ret = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) if (hb_mode < O2HB_HEARTBEAT_NUM_MODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) if (list_empty(&o2hb_all_regions)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) o2hb_heartbeat_mode = hb_mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) struct o2hb_node_event {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) struct list_head hn_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) enum o2hb_callback_type hn_event_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) struct o2nm_node *hn_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) int hn_node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) struct o2hb_disk_slot {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) struct o2hb_disk_heartbeat_block *ds_raw_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) u8 ds_node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) u64 ds_last_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) u64 ds_last_generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) u16 ds_equal_samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) u16 ds_changed_samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) struct list_head ds_live_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) /* each thread owns a region.. when we're asked to tear down the region
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * we ask the thread to stop, who cleans up the region */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) struct o2hb_region {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) struct config_item hr_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) struct list_head hr_all_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) unsigned hr_unclean_stop:1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) hr_aborted_start:1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) hr_item_pinned:1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) hr_item_dropped:1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) hr_node_deleted:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) /* protected by the hr_callback_sem */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) struct task_struct *hr_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) unsigned int hr_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) unsigned long long hr_start_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) unsigned int hr_block_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) unsigned int hr_block_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) unsigned int hr_slots_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) unsigned int hr_num_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) struct page **hr_slot_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) struct block_device *hr_bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) struct o2hb_disk_slot *hr_slots;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) /* live node map of this region */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) unsigned long hr_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) unsigned int hr_region_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) struct dentry *hr_debug_dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) struct o2hb_debug_buf *hr_db_livenodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) struct o2hb_debug_buf *hr_db_regnum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) struct o2hb_debug_buf *hr_db_elapsed_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) struct o2hb_debug_buf *hr_db_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) /* let the person setting up hb wait for it to return until it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * has reached a 'steady' state. This will be fixed when we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * a more complete api that doesn't lead to this sort of fragility. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) atomic_t hr_steady_iterations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) /* terminate o2hb thread if it does not reach steady state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * (hr_steady_iterations == 0) within hr_unsteady_iterations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) atomic_t hr_unsteady_iterations;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) char hr_dev_name[BDEVNAME_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) unsigned int hr_timeout_ms;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) /* randomized as the region goes up and down so that a node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * recognizes a node going up and down in one iteration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) u64 hr_generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) struct delayed_work hr_write_timeout_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) unsigned long hr_last_timeout_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) /* negotiate timer, used to negotiate extending hb timeout. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) struct delayed_work hr_nego_timeout_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) unsigned long hr_nego_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) /* Used during o2hb_check_slot to hold a copy of the block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * being checked because we temporarily have to zero out the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * crc field. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) struct o2hb_disk_heartbeat_block *hr_tmp_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /* Message key for negotiate timeout message. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) unsigned int hr_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) struct list_head hr_handler_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) /* last hb status, 0 for success, other value for error. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) int hr_last_hb_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) struct o2hb_bio_wait_ctxt {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) atomic_t wc_num_reqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) struct completion wc_io_complete;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) int wc_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) #define O2HB_NEGO_TIMEOUT_MS (O2HB_MAX_WRITE_TIMEOUT_MS/2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) O2HB_NEGO_TIMEOUT_MSG = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) O2HB_NEGO_APPROVE_MSG = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) struct o2hb_nego_msg {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) u8 node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) static void o2hb_write_timeout(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) int failed, quorum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) struct o2hb_region *reg =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) container_of(work, struct o2hb_region,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) hr_write_timeout_work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) "milliseconds\n", reg->hr_dev_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) if (o2hb_global_heartbeat_active()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) set_bit(reg->hr_region_num, o2hb_failed_region_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) failed = bitmap_weight(o2hb_failed_region_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) O2NM_MAX_REGIONS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) quorum = bitmap_weight(o2hb_quorum_region_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) O2NM_MAX_REGIONS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) quorum, failed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) * Fence if the number of failed regions >= half the number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) * of quorum regions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) if ((failed << 1) < quorum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) o2quo_disk_timeout();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) static void o2hb_arm_timeout(struct o2hb_region *reg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) /* Arm writeout only after thread reaches steady state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) if (atomic_read(®->hr_steady_iterations) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) O2HB_MAX_WRITE_TIMEOUT_MS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) if (o2hb_global_heartbeat_active()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) clear_bit(reg->hr_region_num, o2hb_failed_region_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) cancel_delayed_work(®->hr_write_timeout_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) schedule_delayed_work(®->hr_write_timeout_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) msecs_to_jiffies(O2HB_MAX_WRITE_TIMEOUT_MS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) cancel_delayed_work(®->hr_nego_timeout_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) /* negotiate timeout must be less than write timeout. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) schedule_delayed_work(®->hr_nego_timeout_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) msecs_to_jiffies(O2HB_NEGO_TIMEOUT_MS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) memset(reg->hr_nego_node_bitmap, 0, sizeof(reg->hr_nego_node_bitmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) static void o2hb_disarm_timeout(struct o2hb_region *reg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) cancel_delayed_work_sync(®->hr_write_timeout_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) cancel_delayed_work_sync(®->hr_nego_timeout_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) static int o2hb_send_nego_msg(int key, int type, u8 target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) struct o2hb_nego_msg msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) int status, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) msg.node_num = o2nm_this_node();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) ret = o2net_send_message(type, key, &msg, sizeof(msg),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) target, &status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) if (ret == -EAGAIN || ret == -ENOMEM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) msleep(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) static void o2hb_nego_timeout(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) int master_node, i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) struct o2hb_region *reg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) reg = container_of(work, struct o2hb_region, hr_nego_timeout_work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) /* don't negotiate timeout if last hb failed since it is very
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * possible io failed. Should let write timeout fence self.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) if (reg->hr_last_hb_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) /* lowest node as master node to make negotiate decision. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) if (master_node == o2nm_this_node()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) if (!test_bit(master_node, reg->hr_nego_node_bitmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s).\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) config_item_name(®->hr_item), reg->hr_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) set_bit(master_node, reg->hr_nego_node_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) if (memcmp(reg->hr_nego_node_bitmap, live_node_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) sizeof(reg->hr_nego_node_bitmap))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) /* check negotiate bitmap every second to do timeout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) * approve decision.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) schedule_delayed_work(®->hr_nego_timeout_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) msecs_to_jiffies(1000));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) printk(KERN_NOTICE "o2hb: all nodes hb write hung, maybe region %s (%s) is down.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) config_item_name(®->hr_item), reg->hr_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) /* approve negotiate timeout request. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) o2hb_arm_timeout(reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) i = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) while ((i = find_next_bit(live_node_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) if (i == master_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) mlog(ML_HEARTBEAT, "send NEGO_APPROVE msg to node %d\n", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) ret = o2hb_send_nego_msg(reg->hr_key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) O2HB_NEGO_APPROVE_MSG, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) mlog(ML_ERROR, "send NEGO_APPROVE msg to node %d fail %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) i, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) /* negotiate timeout with master node. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s), negotiate timeout with node %d.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, config_item_name(®->hr_item),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) reg->hr_dev_name, master_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) ret = o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) master_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) mlog(ML_ERROR, "send NEGO_TIMEOUT msg to node %d fail %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) master_node, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) struct o2hb_region *reg = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) struct o2hb_nego_msg *nego_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) nego_msg = (struct o2hb_nego_msg *)msg->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) printk(KERN_NOTICE "o2hb: receive negotiate timeout message from node %d on region %s (%s).\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) nego_msg->node_num, config_item_name(®->hr_item), reg->hr_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) if (nego_msg->node_num < O2NM_MAX_NODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) mlog(ML_ERROR, "got nego timeout message from bad node.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) static int o2hb_nego_approve_handler(struct o2net_msg *msg, u32 len, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) void **ret_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) struct o2hb_region *reg = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) printk(KERN_NOTICE "o2hb: negotiate timeout approved by master node on region %s (%s).\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) config_item_name(®->hr_item), reg->hr_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) o2hb_arm_timeout(reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) atomic_set(&wc->wc_num_reqs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) init_completion(&wc->wc_io_complete);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) wc->wc_error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) /* Used in error paths too */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) static inline void o2hb_bio_wait_dec(struct o2hb_bio_wait_ctxt *wc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) unsigned int num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) /* sadly atomic_sub_and_test() isn't available on all platforms. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * good news is that the fast path only completes one at a time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) while(num--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) if (atomic_dec_and_test(&wc->wc_num_reqs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) BUG_ON(num > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) complete(&wc->wc_io_complete);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) static void o2hb_wait_on_io(struct o2hb_bio_wait_ctxt *wc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) o2hb_bio_wait_dec(wc, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) wait_for_completion(&wc->wc_io_complete);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) static void o2hb_bio_end_io(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) if (bio->bi_status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) mlog(ML_ERROR, "IO Error %d\n", bio->bi_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) wc->wc_error = blk_status_to_errno(bio->bi_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) o2hb_bio_wait_dec(wc, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) /* Setup a Bio to cover I/O against num_slots slots starting at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) * start_slot. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) struct o2hb_bio_wait_ctxt *wc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) unsigned int *current_slot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) unsigned int max_slots, int op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) int op_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) int len, current_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) unsigned int vec_len, vec_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) unsigned int bits = reg->hr_block_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) unsigned int spp = reg->hr_slots_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) unsigned int cs = *current_slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) /* Testing has shown this allocation to take long enough under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) * GFP_KERNEL that the local node can get fenced. It would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * nicest if we could pre-allocate these bios and avoid this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) * all together. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) bio = bio_alloc(GFP_ATOMIC, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) if (!bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) mlog(ML_ERROR, "Could not alloc slots BIO!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) bio = ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) /* Must put everything in 512 byte sectors for the bio... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) bio->bi_iter.bi_sector = (reg->hr_start_block + cs) << (bits - 9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) bio_set_dev(bio, reg->hr_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) bio->bi_private = wc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) bio->bi_end_io = o2hb_bio_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) bio_set_op_attrs(bio, op, op_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) vec_start = (cs << bits) % PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) while(cs < max_slots) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) current_page = cs / spp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) page = reg->hr_slot_data[current_page];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) vec_len = min(PAGE_SIZE - vec_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) (max_slots-cs) * (PAGE_SIZE/spp) );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) current_page, vec_len, vec_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) len = bio_add_page(bio, page, vec_len, vec_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) if (len != vec_len) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) cs += vec_len / (PAGE_SIZE/spp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) vec_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) *current_slot = cs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) return bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) static int o2hb_read_slots(struct o2hb_region *reg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) unsigned int begin_slot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) unsigned int max_slots)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) unsigned int current_slot = begin_slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) struct o2hb_bio_wait_ctxt wc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) o2hb_bio_wait_init(&wc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) while(current_slot < max_slots) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) bio = o2hb_setup_one_bio(reg, &wc, ¤t_slot, max_slots,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) REQ_OP_READ, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) if (IS_ERR(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) status = PTR_ERR(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) goto bail_and_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) atomic_inc(&wc.wc_num_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) bail_and_wait:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) o2hb_wait_on_io(&wc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) if (wc.wc_error && !status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) status = wc.wc_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) static int o2hb_issue_node_write(struct o2hb_region *reg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) struct o2hb_bio_wait_ctxt *write_wc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) unsigned int slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) o2hb_bio_wait_init(write_wc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) slot = o2nm_this_node();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1, REQ_OP_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) REQ_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) if (IS_ERR(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) status = PTR_ERR(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) mlog_errno(status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) atomic_inc(&write_wc->wc_num_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) static u32 o2hb_compute_block_crc_le(struct o2hb_region *reg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) struct o2hb_disk_heartbeat_block *hb_block)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) __le32 old_cksum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) u32 ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) /* We want to compute the block crc with a 0 value in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) * hb_cksum field. Save it off here and replace after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) * crc. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) old_cksum = hb_block->hb_cksum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) hb_block->hb_cksum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) ret = crc32_le(0, (unsigned char *) hb_block, reg->hr_block_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) hb_block->hb_cksum = old_cksum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) static void o2hb_dump_slot(struct o2hb_disk_heartbeat_block *hb_block)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) mlog(ML_ERROR, "Dump slot information: seq = 0x%llx, node = %u, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) "cksum = 0x%x, generation 0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) (long long)le64_to_cpu(hb_block->hb_seq),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) hb_block->hb_node, le32_to_cpu(hb_block->hb_cksum),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) (long long)le64_to_cpu(hb_block->hb_generation));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) static int o2hb_verify_crc(struct o2hb_region *reg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) struct o2hb_disk_heartbeat_block *hb_block)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) u32 read, computed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) read = le32_to_cpu(hb_block->hb_cksum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) computed = o2hb_compute_block_crc_le(reg, hb_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) return read == computed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) * Compare the slot data with what we wrote in the last iteration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) * If the match fails, print an appropriate error message. This is to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) * detect errors like... another node hearting on the same slot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) * flaky device that is losing writes, etc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) * Returns 1 if check succeeds, 0 otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) static int o2hb_check_own_slot(struct o2hb_region *reg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) struct o2hb_disk_slot *slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) struct o2hb_disk_heartbeat_block *hb_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) char *errstr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) slot = ®->hr_slots[o2nm_this_node()];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) /* Don't check on our 1st timestamp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) if (!slot->ds_last_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) hb_block = slot->ds_raw_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) hb_block->hb_node == slot->ds_node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) #define ERRSTR1 "Another node is heartbeating on device"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) #define ERRSTR2 "Heartbeat generation mismatch on device"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) #define ERRSTR3 "Heartbeat sequence mismatch on device"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) if (hb_block->hb_node != slot->ds_node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) errstr = ERRSTR1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) else if (le64_to_cpu(hb_block->hb_generation) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) slot->ds_last_generation)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) errstr = ERRSTR2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) errstr = ERRSTR3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) slot->ds_node_num, (unsigned long long)slot->ds_last_generation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) (unsigned long long)slot->ds_last_time, hb_block->hb_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) (unsigned long long)le64_to_cpu(hb_block->hb_generation),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) (unsigned long long)le64_to_cpu(hb_block->hb_seq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) static inline void o2hb_prepare_block(struct o2hb_region *reg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) u64 generation)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) int node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) u64 cputime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) struct o2hb_disk_slot *slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) struct o2hb_disk_heartbeat_block *hb_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) node_num = o2nm_this_node();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) slot = ®->hr_slots[node_num];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) hb_block = (struct o2hb_disk_heartbeat_block *)slot->ds_raw_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) memset(hb_block, 0, reg->hr_block_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) /* TODO: time stuff */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) cputime = ktime_get_real_seconds();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) if (!cputime)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) cputime = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) hb_block->hb_seq = cpu_to_le64(cputime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) hb_block->hb_node = node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) hb_block->hb_generation = cpu_to_le64(generation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) hb_block->hb_dead_ms = cpu_to_le32(o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) /* This step must always happen last! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) hb_block->hb_cksum = cpu_to_le32(o2hb_compute_block_crc_le(reg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) hb_block));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) mlog(ML_HB_BIO, "our node generation = 0x%llx, cksum = 0x%x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) (long long)generation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) le32_to_cpu(hb_block->hb_cksum));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) static void o2hb_fire_callbacks(struct o2hb_callback *hbcall,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) struct o2nm_node *node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) struct o2hb_callback_func *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) list_for_each_entry(f, &hbcall->list, hc_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) mlog(ML_HEARTBEAT, "calling funcs %p\n", f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) (f->hc_func)(node, idx, f->hc_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) /* Will run the list in order until we process the passed event */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) static void o2hb_run_event_list(struct o2hb_node_event *queued_event)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) struct o2hb_callback *hbcall;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) struct o2hb_node_event *event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) /* Holding callback sem assures we don't alter the callback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) * lists when doing this, and serializes ourselves with other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) * processes wanting callbacks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) down_write(&o2hb_callback_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) while (!list_empty(&o2hb_node_events)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) && !list_empty(&queued_event->hn_item)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) event = list_entry(o2hb_node_events.next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) struct o2hb_node_event,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) hn_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) list_del_init(&event->hn_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) mlog(ML_HEARTBEAT, "Node %s event for %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) event->hn_event_type == O2HB_NODE_UP_CB ? "UP" : "DOWN",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) event->hn_node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) hbcall = hbcall_from_type(event->hn_event_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) /* We should *never* have gotten on to the list with a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) * bad type... This isn't something that we should try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) * to recover from. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) BUG_ON(IS_ERR(hbcall));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) o2hb_fire_callbacks(hbcall, event->hn_node, event->hn_node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) up_write(&o2hb_callback_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) static void o2hb_queue_node_event(struct o2hb_node_event *event,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) enum o2hb_callback_type type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) struct o2nm_node *node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) int node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) assert_spin_locked(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) BUG_ON((!node) && (type != O2HB_NODE_DOWN_CB));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) event->hn_event_type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) event->hn_node = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) event->hn_node_num = node_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) mlog(ML_HEARTBEAT, "Queue node %s event for node %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) type == O2HB_NODE_UP_CB ? "UP" : "DOWN", node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) list_add_tail(&event->hn_item, &o2hb_node_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) struct o2hb_node_event event =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) { .hn_item = LIST_HEAD_INIT(event.hn_item), };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) struct o2nm_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) int queued = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) node = o2nm_get_node_by_num(slot->ds_node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) if (!node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) if (!list_empty(&slot->ds_live_item)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) mlog(ML_HEARTBEAT, "Shutdown, node %d leaves region\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) slot->ds_node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) list_del_init(&slot->ds_live_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) if (list_empty(&o2hb_live_slots[slot->ds_node_num])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) clear_bit(slot->ds_node_num, o2hb_live_node_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) slot->ds_node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) queued = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) if (queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) o2hb_run_event_list(&event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) o2nm_node_put(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) static void o2hb_set_quorum_device(struct o2hb_region *reg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) if (!o2hb_global_heartbeat_active())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) /* Prevent race with o2hb_heartbeat_group_drop_item() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) if (kthread_should_stop())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) /* Tag region as quorum only after thread reaches steady state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) if (atomic_read(®->hr_steady_iterations) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) * A region can be added to the quorum only when it sees all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) * live nodes heartbeat on it. In other words, the region has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) * added to all nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) sizeof(o2hb_live_node_bitmap)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) printk(KERN_NOTICE "o2hb: Region %s (%s) is now a quorum device\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) config_item_name(®->hr_item), reg->hr_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) * If global heartbeat active, unpin all regions if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) * region count > CUT_OFF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) if (bitmap_weight(o2hb_quorum_region_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) o2hb_region_unpin(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) static int o2hb_check_slot(struct o2hb_region *reg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) struct o2hb_disk_slot *slot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) int changed = 0, gen_changed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) struct o2hb_node_event event =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) { .hn_item = LIST_HEAD_INIT(event.hn_item), };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) struct o2nm_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) struct o2hb_disk_heartbeat_block *hb_block = reg->hr_tmp_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) u64 cputime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) unsigned int slot_dead_ms;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) int tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) int queued = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) * If a node is no longer configured but is still in the livemap, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) * may need to clear that bit from the livemap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) node = o2nm_get_node_by_num(slot->ds_node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) if (!node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) tmp = test_bit(slot->ds_node_num, o2hb_live_node_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) if (!tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) if (!o2hb_verify_crc(reg, hb_block)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) /* all paths from here will drop o2hb_live_lock for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) * us. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) /* Don't print an error on the console in this case -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) * a freshly formatted heartbeat area will not have a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) * crc set on it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) if (list_empty(&slot->ds_live_item))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) /* The node is live but pushed out a bad crc. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) * consider it a transient miss but don't populate any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) * other values as they may be junk. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) mlog(ML_ERROR, "Node %d has written a bad crc to %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) slot->ds_node_num, reg->hr_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) o2hb_dump_slot(hb_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) slot->ds_equal_samples++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) goto fire_callbacks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) /* we don't care if these wrap.. the state transitions below
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) * clear at the right places */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) cputime = le64_to_cpu(hb_block->hb_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) if (slot->ds_last_time != cputime)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) slot->ds_changed_samples++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) slot->ds_equal_samples++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) slot->ds_last_time = cputime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) /* The node changed heartbeat generations. We assume this to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) * mean it dropped off but came back before we timed out. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) * want to consider it down for the time being but don't want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) * to lose any changed_samples state we might build up to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) * considering it live again. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) if (slot->ds_last_generation != le64_to_cpu(hb_block->hb_generation)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) gen_changed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) slot->ds_equal_samples = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) mlog(ML_HEARTBEAT, "Node %d changed generation (0x%llx "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) "to 0x%llx)\n", slot->ds_node_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) (long long)slot->ds_last_generation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) (long long)le64_to_cpu(hb_block->hb_generation));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) slot->ds_last_generation = le64_to_cpu(hb_block->hb_generation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) mlog(ML_HEARTBEAT, "Slot %d gen 0x%llx cksum 0x%x "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) "seq %llu last %llu changed %u equal %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) slot->ds_node_num, (long long)slot->ds_last_generation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) le32_to_cpu(hb_block->hb_cksum),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) (unsigned long long)le64_to_cpu(hb_block->hb_seq),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) (unsigned long long)slot->ds_last_time, slot->ds_changed_samples,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) slot->ds_equal_samples);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) fire_callbacks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) /* dead nodes only come to life after some number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) * changes at any time during their dead time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) if (list_empty(&slot->ds_live_item) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) slot->ds_changed_samples >= O2HB_LIVE_THRESHOLD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) slot->ds_node_num, (long long)slot->ds_last_generation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) set_bit(slot->ds_node_num, reg->hr_live_node_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) /* first on the list generates a callback */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) if (list_empty(&o2hb_live_slots[slot->ds_node_num])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) mlog(ML_HEARTBEAT, "o2hb: Add node %d to live nodes "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) "bitmap\n", slot->ds_node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) set_bit(slot->ds_node_num, o2hb_live_node_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) slot->ds_node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) changed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) queued = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) list_add_tail(&slot->ds_live_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) &o2hb_live_slots[slot->ds_node_num]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) slot->ds_equal_samples = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) /* We want to be sure that all nodes agree on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) * number of milliseconds before a node will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) * considered dead. The self-fencing timeout is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) * computed from this value, and a discrepancy might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) * result in heartbeat calling a node dead when it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) * hasn't self-fenced yet. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) slot_dead_ms = le32_to_cpu(hb_block->hb_dead_ms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) if (slot_dead_ms && slot_dead_ms != dead_ms) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) /* TODO: Perhaps we can fail the region here. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) mlog(ML_ERROR, "Node %d on device %s has a dead count "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) "of %u ms, but our count is %u ms.\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) "Please double check your configuration values "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) "for 'O2CB_HEARTBEAT_THRESHOLD'\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) slot->ds_node_num, reg->hr_dev_name, slot_dead_ms,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) dead_ms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) /* if the list is dead, we're done.. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) if (list_empty(&slot->ds_live_item))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) /* live nodes only go dead after enough consequtive missed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) * samples.. reset the missed counter whenever we see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * activity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) if (slot->ds_equal_samples >= o2hb_dead_threshold || gen_changed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) mlog(ML_HEARTBEAT, "Node %d left my region\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) slot->ds_node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) clear_bit(slot->ds_node_num, reg->hr_live_node_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) /* last off the live_slot generates a callback */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) list_del_init(&slot->ds_live_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) if (list_empty(&o2hb_live_slots[slot->ds_node_num])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) mlog(ML_HEARTBEAT, "o2hb: Remove node %d from live "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) "nodes bitmap\n", slot->ds_node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) clear_bit(slot->ds_node_num, o2hb_live_node_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) /* node can be null */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) node, slot->ds_node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) changed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) queued = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) /* We don't clear this because the node is still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) * actually writing new blocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) if (!gen_changed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) slot->ds_changed_samples = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) if (slot->ds_changed_samples) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) slot->ds_changed_samples = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) slot->ds_equal_samples = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) if (queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) o2hb_run_event_list(&event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) if (node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) o2nm_node_put(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) return changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) static int o2hb_highest_node(unsigned long *nodes, int numbits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) return find_last_bit(nodes, numbits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) static int o2hb_lowest_node(unsigned long *nodes, int numbits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) return find_first_bit(nodes, numbits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) int i, ret, highest_node, lowest_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) int membership_change = 0, own_slot_ok = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) struct o2hb_bio_wait_ctxt write_wc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) ret = o2nm_configured_node_map(configured_nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) sizeof(configured_nodes));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) * If a node is not configured but is in the livemap, we still need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) * to read the slot so as to be able to remove it from the livemap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) i = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) while ((i = find_next_bit(live_node_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) set_bit(i, configured_nodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) lowest_node = o2hb_lowest_node(configured_nodes, O2NM_MAX_NODES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) if (highest_node >= O2NM_MAX_NODES || lowest_node >= O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) mlog(ML_NOTICE, "o2hb: No configured nodes found!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) /* No sense in reading the slots of nodes that don't exist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) * yet. Of course, if the node definitions have holes in them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) * then we're reading an empty slot anyway... Consider this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) * best-effort. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) ret = o2hb_read_slots(reg, lowest_node, highest_node + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) /* With an up to date view of the slots, we can check that no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) * other node has been improperly configured to heartbeat in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) * our slot. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) own_slot_ok = o2hb_check_own_slot(reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) /* fill in the proper info for our next heartbeat */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) o2hb_prepare_block(reg, reg->hr_generation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) ret = o2hb_issue_node_write(reg, &write_wc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) i = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) while((i = find_next_bit(configured_nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) membership_change |= o2hb_check_slot(reg, ®->hr_slots[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) * We have to be sure we've advertised ourselves on disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) * before we can go to steady state. This ensures that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) * people we find in our steady state have seen us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) o2hb_wait_on_io(&write_wc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) if (write_wc.wc_error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) /* Do not re-arm the write timeout on I/O error - we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) * can't be sure that the new block ever made it to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) * disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) write_wc.wc_error, reg->hr_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) ret = write_wc.wc_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) /* Skip disarming the timeout if own slot has stale/bad data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) if (own_slot_ok) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) o2hb_set_quorum_device(reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) o2hb_arm_timeout(reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) reg->hr_last_timeout_start = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) /* let the person who launched us know when things are steady */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) if (atomic_read(®->hr_steady_iterations) != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) if (!ret && own_slot_ok && !membership_change) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) if (atomic_dec_and_test(®->hr_steady_iterations))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) wake_up(&o2hb_steady_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) if (atomic_read(®->hr_steady_iterations) != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) if (atomic_dec_and_test(®->hr_unsteady_iterations)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) printk(KERN_NOTICE "o2hb: Unable to stabilize "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) "heartbeat on region %s (%s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) config_item_name(®->hr_item),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) reg->hr_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) atomic_set(®->hr_steady_iterations, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) reg->hr_aborted_start = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) wake_up(&o2hb_steady_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) * we ride the region ref that the region dir holds. before the region
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) * dir is removed and drops it ref it will wait to tear down this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) * thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) static int o2hb_thread(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) struct o2hb_region *reg = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) struct o2hb_bio_wait_ctxt write_wc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) ktime_t before_hb, after_hb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) unsigned int elapsed_msec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread running\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) set_user_nice(current, MIN_NICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) /* Pin node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) ret = o2nm_depend_this_node();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) mlog(ML_ERROR, "Node has been deleted, ret = %d\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) reg->hr_node_deleted = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) wake_up(&o2hb_steady_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) while (!kthread_should_stop() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) !reg->hr_unclean_stop && !reg->hr_aborted_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) /* We track the time spent inside
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) * o2hb_do_disk_heartbeat so that we avoid more than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) * hr_timeout_ms between disk writes. On busy systems
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) * this should result in a heartbeat which is less
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) * likely to time itself out. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) before_hb = ktime_get_real();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) ret = o2hb_do_disk_heartbeat(reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) reg->hr_last_hb_status = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) after_hb = ktime_get_real();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) elapsed_msec = (unsigned int)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) ktime_ms_delta(after_hb, before_hb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) mlog(ML_HEARTBEAT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) "start = %lld, end = %lld, msec = %u, ret = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) before_hb, after_hb, elapsed_msec, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) if (!kthread_should_stop() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) elapsed_msec < reg->hr_timeout_ms) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) /* the kthread api has blocked signals for us so no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) * need to record the return value. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) msleep_interruptible(reg->hr_timeout_ms - elapsed_msec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) o2hb_disarm_timeout(reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) /* unclean stop is only used in very bad situation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) for(i = 0; !reg->hr_unclean_stop && i < reg->hr_blocks; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) o2hb_shutdown_slot(®->hr_slots[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) /* Explicit down notification - avoid forcing the other nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) * to timeout on this region when we could just as easily
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) * write a clear generation - thus indicating to them that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) * this node has left this region.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) if (!reg->hr_unclean_stop && !reg->hr_aborted_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) o2hb_prepare_block(reg, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) ret = o2hb_issue_node_write(reg, &write_wc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) o2hb_wait_on_io(&write_wc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) /* Unpin node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) o2nm_undepend_this_node();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) mlog(ML_HEARTBEAT|ML_KTHREAD, "o2hb thread exiting\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) #ifdef CONFIG_DEBUG_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) static int o2hb_debug_open(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) struct o2hb_debug_buf *db = inode->i_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) struct o2hb_region *reg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) unsigned long lts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) char *buf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) int i = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) int out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) /* max_nodes should be the largest bitmap we pass here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) BUG_ON(sizeof(map) < db->db_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) if (!buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) goto bail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) switch (db->db_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) case O2HB_DB_TYPE_LIVENODES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) case O2HB_DB_TYPE_LIVEREGIONS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) case O2HB_DB_TYPE_QUORUMREGIONS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) case O2HB_DB_TYPE_FAILEDREGIONS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) memcpy(map, db->db_data, db->db_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) case O2HB_DB_TYPE_REGION_LIVENODES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) reg = (struct o2hb_region *)db->db_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) memcpy(map, reg->hr_live_node_bitmap, db->db_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) case O2HB_DB_TYPE_REGION_NUMBER:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) reg = (struct o2hb_region *)db->db_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) out += scnprintf(buf + out, PAGE_SIZE - out, "%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) reg->hr_region_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) case O2HB_DB_TYPE_REGION_ELAPSED_TIME:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) reg = (struct o2hb_region *)db->db_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) lts = reg->hr_last_timeout_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) /* If 0, it has never been set before */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) if (lts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) lts = jiffies_to_msecs(jiffies - lts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) out += scnprintf(buf + out, PAGE_SIZE - out, "%lu\n", lts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) case O2HB_DB_TYPE_REGION_PINNED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) reg = (struct o2hb_region *)db->db_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) out += scnprintf(buf + out, PAGE_SIZE - out, "%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) !!reg->hr_item_pinned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) while ((i = find_next_bit(map, db->db_len, i + 1)) < db->db_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) out += scnprintf(buf + out, PAGE_SIZE - out, "%d ", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) out += scnprintf(buf + out, PAGE_SIZE - out, "\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) i_size_write(inode, out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) file->private_data = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) bail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) static int o2hb_debug_release(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) kfree(file->private_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) size_t nbytes, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) i_size_read(file->f_mapping->host));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) static int o2hb_debug_open(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) static int o2hb_debug_release(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) size_t nbytes, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) #endif /* CONFIG_DEBUG_FS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) static const struct file_operations o2hb_debug_fops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) .open = o2hb_debug_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) .release = o2hb_debug_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) .read = o2hb_debug_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) .llseek = generic_file_llseek,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) void o2hb_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) debugfs_remove_recursive(o2hb_debug_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) kfree(o2hb_db_livenodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) kfree(o2hb_db_liveregions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) kfree(o2hb_db_quorumregions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) kfree(o2hb_db_failedregions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) static void o2hb_debug_create(const char *name, struct dentry *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) struct o2hb_debug_buf **db, int db_len, int type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) int size, int len, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) *db = kmalloc(db_len, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) if (!*db)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) (*db)->db_type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) (*db)->db_size = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) (*db)->db_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) (*db)->db_data = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db, &o2hb_debug_fops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) static void o2hb_debug_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) o2hb_debug_create(O2HB_DEBUG_LIVENODES, o2hb_debug_dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) &o2hb_db_livenodes, sizeof(*o2hb_db_livenodes),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) O2HB_DB_TYPE_LIVENODES, sizeof(o2hb_live_node_bitmap),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) O2NM_MAX_NODES, o2hb_live_node_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, o2hb_debug_dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) &o2hb_db_liveregions, sizeof(*o2hb_db_liveregions),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) O2HB_DB_TYPE_LIVEREGIONS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) sizeof(o2hb_live_region_bitmap), O2NM_MAX_REGIONS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) o2hb_live_region_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, o2hb_debug_dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) &o2hb_db_quorumregions,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) sizeof(*o2hb_db_quorumregions),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) O2HB_DB_TYPE_QUORUMREGIONS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) sizeof(o2hb_quorum_region_bitmap), O2NM_MAX_REGIONS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) o2hb_quorum_region_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, o2hb_debug_dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) &o2hb_db_failedregions,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) sizeof(*o2hb_db_failedregions),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) O2HB_DB_TYPE_FAILEDREGIONS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) sizeof(o2hb_failed_region_bitmap), O2NM_MAX_REGIONS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) o2hb_failed_region_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) void o2hb_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) for (i = 0; i < ARRAY_SIZE(o2hb_callbacks); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) INIT_LIST_HEAD(&o2hb_callbacks[i].list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) for (i = 0; i < ARRAY_SIZE(o2hb_live_slots); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) INIT_LIST_HEAD(&o2hb_live_slots[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) INIT_LIST_HEAD(&o2hb_node_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) o2hb_dependent_users = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) o2hb_debug_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) /* if we're already in a callback then we're already serialized by the sem */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) static void o2hb_fill_node_map_from_callback(unsigned long *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) unsigned bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) memcpy(map, &o2hb_live_node_bitmap, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) * get a map of all nodes that are heartbeating in any regions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) void o2hb_fill_node_map(unsigned long *map, unsigned bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) /* callers want to serialize this map and callbacks so that they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) * can trust that they don't miss nodes coming to the party */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) down_read(&o2hb_callback_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) o2hb_fill_node_map_from_callback(map, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) up_read(&o2hb_callback_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) EXPORT_SYMBOL_GPL(o2hb_fill_node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) * heartbeat configfs bits. The heartbeat set is a default set under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) * the cluster set in nodemanager.c.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) static struct o2hb_region *to_o2hb_region(struct config_item *item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) return item ? container_of(item, struct o2hb_region, hr_item) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) /* drop_item only drops its ref after killing the thread, nothing should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) * be using the region anymore. this has to clean up any state that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) * attributes might have built up. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) static void o2hb_region_release(struct config_item *item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) struct o2hb_region *reg = to_o2hb_region(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) kfree(reg->hr_tmp_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) if (reg->hr_slot_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) for (i = 0; i < reg->hr_num_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) page = reg->hr_slot_data[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) __free_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) kfree(reg->hr_slot_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) if (reg->hr_bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) kfree(reg->hr_slots);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) debugfs_remove_recursive(reg->hr_debug_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) kfree(reg->hr_db_livenodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) kfree(reg->hr_db_regnum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) kfree(reg->hr_db_elapsed_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) kfree(reg->hr_db_pinned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) list_del(®->hr_all_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) o2net_unregister_handler_list(®->hr_handler_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) kfree(reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) static int o2hb_read_block_input(struct o2hb_region *reg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) const char *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) unsigned long *ret_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) unsigned int *ret_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) unsigned long bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) char *p = (char *)page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) bytes = simple_strtoul(p, &p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) if (!p || (*p && (*p != '\n')))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) /* Heartbeat and fs min / max block sizes are the same. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) if (bytes > 4096 || bytes < 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) if (hweight16(bytes) != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) if (ret_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) *ret_bytes = bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) if (ret_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) *ret_bits = ffs(bytes) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) static ssize_t o2hb_region_block_bytes_show(struct config_item *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) char *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) return sprintf(page, "%u\n", to_o2hb_region(item)->hr_block_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) static ssize_t o2hb_region_block_bytes_store(struct config_item *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) const char *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) struct o2hb_region *reg = to_o2hb_region(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) unsigned long block_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) unsigned int block_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) if (reg->hr_bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) status = o2hb_read_block_input(reg, page, &block_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) &block_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) if (status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) reg->hr_block_bytes = (unsigned int)block_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) reg->hr_block_bits = block_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) static ssize_t o2hb_region_start_block_show(struct config_item *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) char *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) return sprintf(page, "%llu\n", to_o2hb_region(item)->hr_start_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) static ssize_t o2hb_region_start_block_store(struct config_item *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) const char *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) struct o2hb_region *reg = to_o2hb_region(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) unsigned long long tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) char *p = (char *)page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) if (reg->hr_bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) tmp = simple_strtoull(p, &p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) if (!p || (*p && (*p != '\n')))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) reg->hr_start_block = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) static ssize_t o2hb_region_blocks_show(struct config_item *item, char *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) return sprintf(page, "%d\n", to_o2hb_region(item)->hr_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) static ssize_t o2hb_region_blocks_store(struct config_item *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) const char *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) struct o2hb_region *reg = to_o2hb_region(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) unsigned long tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) char *p = (char *)page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) if (reg->hr_bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) tmp = simple_strtoul(p, &p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) if (!p || (*p && (*p != '\n')))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) if (tmp > O2NM_MAX_NODES || tmp == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) reg->hr_blocks = (unsigned int)tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) static ssize_t o2hb_region_dev_show(struct config_item *item, char *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) unsigned int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) if (to_o2hb_region(item)->hr_bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) ret = sprintf(page, "%s\n", to_o2hb_region(item)->hr_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) static void o2hb_init_region_params(struct o2hb_region *reg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) reg->hr_slots_per_page = PAGE_SIZE >> reg->hr_block_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) reg->hr_timeout_ms = O2HB_REGION_TIMEOUT_MS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) mlog(ML_HEARTBEAT, "hr_start_block = %llu, hr_blocks = %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) reg->hr_start_block, reg->hr_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) mlog(ML_HEARTBEAT, "hr_block_bytes = %u, hr_block_bits = %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) reg->hr_block_bytes, reg->hr_block_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) mlog(ML_HEARTBEAT, "hr_timeout_ms = %u\n", reg->hr_timeout_ms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) mlog(ML_HEARTBEAT, "dead threshold = %u\n", o2hb_dead_threshold);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) static int o2hb_map_slot_data(struct o2hb_region *reg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) unsigned int last_slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) unsigned int spp = reg->hr_slots_per_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) char *raw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) struct o2hb_disk_slot *slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) reg->hr_tmp_block = kmalloc(reg->hr_block_bytes, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) if (reg->hr_tmp_block == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) reg->hr_slots = kcalloc(reg->hr_blocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) sizeof(struct o2hb_disk_slot), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) if (reg->hr_slots == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) for(i = 0; i < reg->hr_blocks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) slot = ®->hr_slots[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) slot->ds_node_num = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) INIT_LIST_HEAD(&slot->ds_live_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) slot->ds_raw_block = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) reg->hr_num_pages = (reg->hr_blocks + spp - 1) / spp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) mlog(ML_HEARTBEAT, "Going to require %u pages to cover %u blocks "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) "at %u blocks per page\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) reg->hr_num_pages, reg->hr_blocks, spp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) reg->hr_slot_data = kcalloc(reg->hr_num_pages, sizeof(struct page *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) if (!reg->hr_slot_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) for(i = 0; i < reg->hr_num_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) page = alloc_page(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) reg->hr_slot_data[i] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) last_slot = i * spp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) raw = page_address(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) for (j = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) (j < spp) && ((j + last_slot) < reg->hr_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) BUG_ON((j + last_slot) >= reg->hr_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) slot = ®->hr_slots[j + last_slot];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) slot->ds_raw_block =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) (struct o2hb_disk_heartbeat_block *) raw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) raw += reg->hr_block_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) /* Read in all the slots available and populate the tracking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) * structures so that we can start with a baseline idea of what's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) * there. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) static int o2hb_populate_slot_data(struct o2hb_region *reg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) int ret, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) struct o2hb_disk_slot *slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) struct o2hb_disk_heartbeat_block *hb_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) ret = o2hb_read_slots(reg, 0, reg->hr_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) /* We only want to get an idea of the values initially in each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) * slot, so we do no verification - o2hb_check_slot will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) * actually determine if each configured slot is valid and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) * whether any values have changed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) for(i = 0; i < reg->hr_blocks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) slot = ®->hr_slots[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) hb_block = (struct o2hb_disk_heartbeat_block *) slot->ds_raw_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) /* Only fill the values that o2hb_check_slot uses to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) * determine changing slots */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) slot->ds_last_time = le64_to_cpu(hb_block->hb_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) slot->ds_last_generation = le64_to_cpu(hb_block->hb_generation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) /* this is acting as commit; we set up all of hr_bdev and hr_task or nothing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) static ssize_t o2hb_region_dev_store(struct config_item *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) const char *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) struct o2hb_region *reg = to_o2hb_region(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) struct task_struct *hb_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) long fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) int sectsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) char *p = (char *)page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) struct fd f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) ssize_t ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) int live_threshold;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) if (reg->hr_bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) /* We can't heartbeat without having had our node number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) * configured yet. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) if (o2nm_this_node() == O2NM_MAX_NODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) fd = simple_strtol(p, &p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) if (!p || (*p && (*p != '\n')))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) if (fd < 0 || fd >= INT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) f = fdget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) if (f.file == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) if (reg->hr_blocks == 0 || reg->hr_start_block == 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) reg->hr_block_bytes == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) goto out2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) if (!S_ISBLK(f.file->f_mapping->host->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) goto out2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) reg->hr_bdev = blkdev_get_by_dev(f.file->f_mapping->host->i_rdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) FMODE_WRITE | FMODE_READ, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) if (IS_ERR(reg->hr_bdev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) ret = PTR_ERR(reg->hr_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) reg->hr_bdev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) goto out2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) bdevname(reg->hr_bdev, reg->hr_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) sectsize = bdev_logical_block_size(reg->hr_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) if (sectsize != reg->hr_block_bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) mlog(ML_ERROR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) "blocksize %u incorrect for device, expected %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) reg->hr_block_bytes, sectsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) goto out3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) o2hb_init_region_params(reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) /* Generation of zero is invalid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) get_random_bytes(®->hr_generation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) sizeof(reg->hr_generation));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) } while (reg->hr_generation == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) ret = o2hb_map_slot_data(reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) goto out3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) ret = o2hb_populate_slot_data(reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) goto out3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) INIT_DELAYED_WORK(®->hr_write_timeout_work, o2hb_write_timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) INIT_DELAYED_WORK(®->hr_nego_timeout_work, o2hb_nego_timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) * A node is considered live after it has beat LIVE_THRESHOLD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) * times. We're not steady until we've given them a chance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) * _after_ our first read.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) * The default threshold is bare minimum so as to limit the delay
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) * during mounts. For global heartbeat, the threshold doubled for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) * first region.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) live_threshold = O2HB_LIVE_THRESHOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) if (o2hb_global_heartbeat_active()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) if (bitmap_weight(o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) live_threshold <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) ++live_threshold;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) atomic_set(®->hr_steady_iterations, live_threshold);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) /* unsteady_iterations is triple the steady_iterations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) atomic_set(®->hr_unsteady_iterations, (live_threshold * 3));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) reg->hr_item.ci_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) if (IS_ERR(hb_task)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) ret = PTR_ERR(hb_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) goto out3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) reg->hr_task = hb_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) ret = wait_event_interruptible(o2hb_steady_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) atomic_read(®->hr_steady_iterations) == 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) reg->hr_node_deleted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) atomic_set(®->hr_steady_iterations, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) reg->hr_aborted_start = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) if (reg->hr_aborted_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) goto out3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) if (reg->hr_node_deleted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) goto out3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) /* Ok, we were woken. Make sure it wasn't by drop_item() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) hb_task = reg->hr_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) if (o2hb_global_heartbeat_active())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) set_bit(reg->hr_region_num, o2hb_live_region_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) if (hb_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) ret = count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) if (hb_task && o2hb_global_heartbeat_active())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) config_item_name(®->hr_item), reg->hr_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) out3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) blkdev_put(reg->hr_bdev, FMODE_READ | FMODE_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) reg->hr_bdev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) out2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) static ssize_t o2hb_region_pid_show(struct config_item *item, char *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) struct o2hb_region *reg = to_o2hb_region(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) pid_t pid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) if (reg->hr_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) pid = task_pid_nr(reg->hr_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) if (!pid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) return sprintf(page, "%u\n", pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) CONFIGFS_ATTR(o2hb_region_, block_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) CONFIGFS_ATTR(o2hb_region_, start_block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) CONFIGFS_ATTR(o2hb_region_, blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) CONFIGFS_ATTR(o2hb_region_, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) CONFIGFS_ATTR_RO(o2hb_region_, pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) static struct configfs_attribute *o2hb_region_attrs[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) &o2hb_region_attr_block_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) &o2hb_region_attr_start_block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) &o2hb_region_attr_blocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) &o2hb_region_attr_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) &o2hb_region_attr_pid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) static struct configfs_item_operations o2hb_region_item_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) .release = o2hb_region_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) static const struct config_item_type o2hb_region_type = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) .ct_item_ops = &o2hb_region_item_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) .ct_attrs = o2hb_region_attrs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) .ct_owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) /* heartbeat set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) struct o2hb_heartbeat_group {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) struct config_group hs_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) /* some stuff? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group *group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) return group ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) container_of(group, struct o2hb_heartbeat_group, hs_group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) static void o2hb_debug_region_init(struct o2hb_region *reg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) struct dentry *parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) struct dentry *dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) dir = debugfs_create_dir(config_item_name(®->hr_item), parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) reg->hr_debug_dir = dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) o2hb_debug_create(O2HB_DEBUG_LIVENODES, dir, &(reg->hr_db_livenodes),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) sizeof(*(reg->hr_db_livenodes)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) O2HB_DB_TYPE_REGION_LIVENODES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) sizeof(reg->hr_live_node_bitmap), O2NM_MAX_NODES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER, dir, &(reg->hr_db_regnum),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) sizeof(*(reg->hr_db_regnum)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) O2HB_DB_TYPE_REGION_NUMBER, 0, O2NM_MAX_NODES, reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME, dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) &(reg->hr_db_elapsed_time),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) sizeof(*(reg->hr_db_elapsed_time)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) O2HB_DB_TYPE_REGION_ELAPSED_TIME, 0, 0, reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) o2hb_debug_create(O2HB_DEBUG_REGION_PINNED, dir, &(reg->hr_db_pinned),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) sizeof(*(reg->hr_db_pinned)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) O2HB_DB_TYPE_REGION_PINNED, 0, 0, reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) const char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) struct o2hb_region *reg = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) if (reg == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) ret = -ENAMETOOLONG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) goto free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) reg->hr_region_num = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) if (o2hb_global_heartbeat_active()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) reg->hr_region_num = find_first_zero_bit(o2hb_region_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) O2NM_MAX_REGIONS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) if (reg->hr_region_num >= O2NM_MAX_REGIONS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) ret = -EFBIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) goto free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) set_bit(reg->hr_region_num, o2hb_region_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) list_add_tail(®->hr_all_item, &o2hb_all_regions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) config_item_init_type_name(®->hr_item, name, &o2hb_region_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) /* this is the same way to generate msg key as dlm, for local heartbeat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) * name is also the same, so make initial crc value different to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) * message key conflict.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) reg->hr_key = crc32_le(reg->hr_region_num + O2NM_MAX_REGIONS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) name, strlen(name));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) INIT_LIST_HEAD(®->hr_handler_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) ret = o2net_register_handler(O2HB_NEGO_TIMEOUT_MSG, reg->hr_key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) sizeof(struct o2hb_nego_msg),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) o2hb_nego_timeout_handler,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) reg, NULL, ®->hr_handler_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) goto remove_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) ret = o2net_register_handler(O2HB_NEGO_APPROVE_MSG, reg->hr_key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) sizeof(struct o2hb_nego_msg),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) o2hb_nego_approve_handler,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) reg, NULL, ®->hr_handler_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) goto unregister_handler;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) o2hb_debug_region_init(reg, o2hb_debug_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) return ®->hr_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) unregister_handler:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) o2net_unregister_handler_list(®->hr_handler_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) remove_item:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) list_del(®->hr_all_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) if (o2hb_global_heartbeat_active())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) clear_bit(reg->hr_region_num, o2hb_region_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) kfree(reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) static void o2hb_heartbeat_group_drop_item(struct config_group *group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) struct config_item *item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) struct task_struct *hb_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) struct o2hb_region *reg = to_o2hb_region(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) int quorum_region = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) /* stop the thread when the user removes the region dir */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) hb_task = reg->hr_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) reg->hr_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) reg->hr_item_dropped = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) if (hb_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) kthread_stop(hb_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) if (o2hb_global_heartbeat_active()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) clear_bit(reg->hr_region_num, o2hb_region_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) quorum_region = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) printk(KERN_NOTICE "o2hb: Heartbeat %s on region %s (%s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) ((atomic_read(®->hr_steady_iterations) == 0) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) "stopped" : "start aborted"), config_item_name(item),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) reg->hr_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) * If we're racing a dev_write(), we need to wake them. They will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) * check reg->hr_task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) if (atomic_read(®->hr_steady_iterations) != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) reg->hr_aborted_start = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) atomic_set(®->hr_steady_iterations, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) wake_up(&o2hb_steady_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) config_item_put(item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) if (!o2hb_global_heartbeat_active() || !quorum_region)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) * If global heartbeat active and there are dependent users,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) * pin all regions if quorum region count <= CUT_OFF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) if (!o2hb_dependent_users)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) if (bitmap_weight(o2hb_quorum_region_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) o2hb_region_pin(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) static ssize_t o2hb_heartbeat_group_dead_threshold_show(struct config_item *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) char *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) return sprintf(page, "%u\n", o2hb_dead_threshold);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) static ssize_t o2hb_heartbeat_group_dead_threshold_store(struct config_item *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) const char *page, size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) unsigned long tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) char *p = (char *)page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) tmp = simple_strtoul(p, &p, 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) if (!p || (*p && (*p != '\n')))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) /* this will validate ranges for us. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) o2hb_dead_threshold_set((unsigned int) tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) static ssize_t o2hb_heartbeat_group_mode_show(struct config_item *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) char *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) return sprintf(page, "%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) o2hb_heartbeat_mode_desc[o2hb_heartbeat_mode]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) static ssize_t o2hb_heartbeat_group_mode_store(struct config_item *item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) const char *page, size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) len = (page[count - 1] == '\n') ? count - 1 : count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) if (!len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) for (i = 0; i < O2HB_HEARTBEAT_NUM_MODES; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) if (strncasecmp(page, o2hb_heartbeat_mode_desc[i], len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) ret = o2hb_global_heartbeat_mode_set(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) o2hb_heartbeat_mode_desc[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) CONFIGFS_ATTR(o2hb_heartbeat_group_, dead_threshold);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) CONFIGFS_ATTR(o2hb_heartbeat_group_, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) &o2hb_heartbeat_group_attr_dead_threshold,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) &o2hb_heartbeat_group_attr_mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) static struct configfs_group_operations o2hb_heartbeat_group_group_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) .make_item = o2hb_heartbeat_group_make_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) .drop_item = o2hb_heartbeat_group_drop_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) static const struct config_item_type o2hb_heartbeat_group_type = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) .ct_group_ops = &o2hb_heartbeat_group_group_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) .ct_attrs = o2hb_heartbeat_group_attrs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) .ct_owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) /* this is just here to avoid touching group in heartbeat.h which the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) * entire damn world #includes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) struct config_group *o2hb_alloc_hb_set(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) struct o2hb_heartbeat_group *hs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) struct config_group *ret = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) hs = kzalloc(sizeof(struct o2hb_heartbeat_group), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) if (hs == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) config_group_init_type_name(&hs->hs_group, "heartbeat",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) &o2hb_heartbeat_group_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) ret = &hs->hs_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) if (ret == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) kfree(hs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) void o2hb_free_hb_set(struct config_group *group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) struct o2hb_heartbeat_group *hs = to_o2hb_heartbeat_group(group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) kfree(hs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) /* hb callback registration and issuing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) if (type == O2HB_NUM_CB)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) return &o2hb_callbacks[type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) void o2hb_setup_callback(struct o2hb_callback_func *hc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) enum o2hb_callback_type type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) o2hb_cb_func *func,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) int priority)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) INIT_LIST_HEAD(&hc->hc_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) hc->hc_func = func;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) hc->hc_data = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) hc->hc_priority = priority;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) hc->hc_type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) hc->hc_magic = O2HB_CB_MAGIC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) EXPORT_SYMBOL_GPL(o2hb_setup_callback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) * In local heartbeat mode, region_uuid passed matches the dlm domain name.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) * In global heartbeat mode, region_uuid passed is NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) * In local, we only pin the matching region. In global we pin all the active
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) * regions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) static int o2hb_region_pin(const char *region_uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) int ret = 0, found = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) struct o2hb_region *reg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) char *uuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) assert_spin_locked(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) if (reg->hr_item_dropped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) uuid = config_item_name(®->hr_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) /* local heartbeat */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) if (region_uuid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) if (strcmp(region_uuid, uuid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) found = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) if (reg->hr_item_pinned || reg->hr_item_dropped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) goto skip_pin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) /* Ignore ENOENT only for local hb (userdlm domain) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) ret = o2nm_depend_item(®->hr_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) mlog(ML_CLUSTER, "Pin region %s\n", uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) reg->hr_item_pinned = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) if (ret == -ENOENT && found)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) mlog(ML_ERROR, "Pin region %s fails with %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) uuid, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) skip_pin:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) if (found)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) * In local heartbeat mode, region_uuid passed matches the dlm domain name.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) * In global heartbeat mode, region_uuid passed is NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) * In local, we only unpin the matching region. In global we unpin all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) * active regions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) static void o2hb_region_unpin(const char *region_uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) struct o2hb_region *reg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) char *uuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) int found = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) assert_spin_locked(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) if (reg->hr_item_dropped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) uuid = config_item_name(®->hr_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) if (region_uuid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) if (strcmp(region_uuid, uuid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) found = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) if (reg->hr_item_pinned) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) mlog(ML_CLUSTER, "Unpin region %s\n", uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) o2nm_undepend_item(®->hr_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) reg->hr_item_pinned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) if (found)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) static int o2hb_region_inc_user(const char *region_uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) /* local heartbeat */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) if (!o2hb_global_heartbeat_active()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) ret = o2hb_region_pin(region_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) * if global heartbeat active and this is the first dependent user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) * pin all regions if quorum region count <= CUT_OFF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) o2hb_dependent_users++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) if (o2hb_dependent_users > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) if (bitmap_weight(o2hb_quorum_region_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) ret = o2hb_region_pin(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) static void o2hb_region_dec_user(const char *region_uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) /* local heartbeat */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) if (!o2hb_global_heartbeat_active()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) o2hb_region_unpin(region_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) * if global heartbeat active and there are no dependent users,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) * unpin all quorum regions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) o2hb_dependent_users--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) if (!o2hb_dependent_users)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) o2hb_region_unpin(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) int o2hb_register_callback(const char *region_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) struct o2hb_callback_func *hc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) struct o2hb_callback_func *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) struct o2hb_callback *hbcall;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) BUG_ON(!list_empty(&hc->hc_item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) hbcall = hbcall_from_type(hc->hc_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) if (IS_ERR(hbcall)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) ret = PTR_ERR(hbcall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) if (region_uuid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) ret = o2hb_region_inc_user(region_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) mlog_errno(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) down_write(&o2hb_callback_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) list_for_each_entry(f, &hbcall->list, hc_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) if (hc->hc_priority < f->hc_priority) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) list_add_tail(&hc->hc_item, &f->hc_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) if (list_empty(&hc->hc_item))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) list_add_tail(&hc->hc_item, &hbcall->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) up_write(&o2hb_callback_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) ret, __builtin_return_address(0), hc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) EXPORT_SYMBOL_GPL(o2hb_register_callback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) void o2hb_unregister_callback(const char *region_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) struct o2hb_callback_func *hc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) __builtin_return_address(0), hc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) /* XXX Can this happen _with_ a region reference? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) if (list_empty(&hc->hc_item))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) if (region_uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) o2hb_region_dec_user(region_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) down_write(&o2hb_callback_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) list_del_init(&hc->hc_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) up_write(&o2hb_callback_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) EXPORT_SYMBOL_GPL(o2hb_unregister_callback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) int o2hb_check_node_heartbeating_no_sem(u8 node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) if (!test_bit(node_num, testing_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) mlog(ML_HEARTBEAT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) "node (%u) does not have heartbeating enabled.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating_no_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) int o2hb_check_node_heartbeating_from_callback(u8 node_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) if (!test_bit(node_num, testing_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) mlog(ML_HEARTBEAT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) "node (%u) does not have heartbeating enabled.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) node_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating_from_callback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) * this is just a hack until we get the plumbing which flips file systems
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) * read only and drops the hb ref instead of killing the node dead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) void o2hb_stop_all_regions(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) struct o2hb_region *reg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) mlog(ML_ERROR, "stopping heartbeat on all active regions.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) list_for_each_entry(reg, &o2hb_all_regions, hr_all_item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) reg->hr_unclean_stop = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) EXPORT_SYMBOL_GPL(o2hb_stop_all_regions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) int o2hb_get_all_regions(char *region_uuids, u8 max_regions)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) struct o2hb_region *reg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) int numregs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) char *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) spin_lock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) p = region_uuids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) if (reg->hr_item_dropped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) mlog(0, "Region: %s\n", config_item_name(®->hr_item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) if (numregs < max_regions) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) memcpy(p, config_item_name(®->hr_item),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) O2HB_MAX_REGION_NAME_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) p += O2HB_MAX_REGION_NAME_LEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) numregs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) spin_unlock(&o2hb_live_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) return numregs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) EXPORT_SYMBOL_GPL(o2hb_get_all_regions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) int o2hb_global_heartbeat_active(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) return (o2hb_heartbeat_mode == O2HB_HEARTBEAT_GLOBAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) EXPORT_SYMBOL(o2hb_global_heartbeat_active);