Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /* -*- mode: c; c-basic-offset: 8; -*-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  * vim: noexpandtab sw=8 ts=8 sts=0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  * Copyright (C) 2005 Oracle.  All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9) /* This quorum hack is only here until we transition to some more rational
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  * approach that is driven from userspace.  Honest.  No foolin'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  * Imagine two nodes lose network connectivity to each other but they're still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  * up and operating in every other way.  Presumably a network timeout indicates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  * that a node is broken and should be recovered.  They can't both recover each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  * other and both carry on without serialising their access to the file system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16)  * They need to decide who is authoritative.  Now extend that problem to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17)  * arbitrary groups of nodes losing connectivity between each other.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19)  * So we declare that a node which has given up on connecting to a majority
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20)  * of nodes who are still heartbeating will fence itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22)  * There are huge opportunities for races here.  After we give up on a node's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23)  * connection we need to wait long enough to give heartbeat an opportunity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24)  * to declare the node as truly dead.  We also need to be careful with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25)  * race between when we see a node start heartbeating and when we connect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26)  * to it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28)  * So nodes that are in this transtion put a hold on the quorum decision
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29)  * with a counter.  As they fall out of this transition they drop the count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30)  * and if they're the last, they fire off the decision.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) #include <linux/workqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) #include <linux/reboot.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) #include "heartbeat.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) #include "nodemanager.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) #define MLOG_MASK_PREFIX ML_QUORUM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) #include "masklog.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) #include "quorum.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) static struct o2quo_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 	spinlock_t		qs_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	struct work_struct	qs_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 	int			qs_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	int			qs_heartbeating;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	unsigned long		qs_hb_bm[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 	int			qs_connected;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 	unsigned long		qs_conn_bm[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 	int			qs_holds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 	unsigned long		qs_hold_bm[BITS_TO_LONGS(O2NM_MAX_NODES)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) } o2quo_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) /* this is horribly heavy-handed.  It should instead flip the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55)  * system RO and call some userspace script. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) static void o2quo_fence_self(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 	/* panic spins with interrupts enabled.  with preempt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	 * threads can still schedule, etc, etc */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 	o2hb_stop_all_regions();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 	switch (o2nm_single_cluster->cl_fence_method) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	case O2NM_FENCE_PANIC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 		panic("*** ocfs2 is very sorry to be fencing this system by "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 		      "panicing ***\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 		WARN_ON(o2nm_single_cluster->cl_fence_method >=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 			O2NM_FENCE_METHODS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 		fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	case O2NM_FENCE_RESET:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 		printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 		       "system by restarting ***\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 		emergency_restart();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) /* Indicate that a timeout occurred on a heartbeat region write. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80)  * other nodes in the cluster may consider us dead at that time so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81)  * want to "fence" ourselves so that we don't scribble on the disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82)  * after they think they've recovered us. This can't solve all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83)  * problems related to writeout after recovery but this hack can at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84)  * least close some of those gaps. When we have real fencing, this can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85)  * go away as our node would be fenced externally before other nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86)  * begin recovery. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) void o2quo_disk_timeout(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 	o2quo_fence_self();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) static void o2quo_make_decision(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	int quorum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 	int lowest_hb, lowest_reachable = 0, fence = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	struct o2quo_state *qs = &o2quo_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	spin_lock(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	lowest_hb = find_first_bit(qs->qs_hb_bm, O2NM_MAX_NODES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 	if (lowest_hb != O2NM_MAX_NODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 		lowest_reachable = test_bit(lowest_hb, qs->qs_conn_bm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	mlog(0, "heartbeating: %d, connected: %d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	     "lowest: %d (%sreachable)\n", qs->qs_heartbeating,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	     qs->qs_connected, lowest_hb, lowest_reachable ? "" : "un");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	if (!test_bit(o2nm_this_node(), qs->qs_hb_bm) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 	    qs->qs_heartbeating == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	if (qs->qs_heartbeating & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 		/* the odd numbered cluster case is straight forward --
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 		 * if we can't talk to the majority we're hosed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 		quorum = (qs->qs_heartbeating + 1)/2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 		if (qs->qs_connected < quorum) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 			mlog(ML_ERROR, "fencing this node because it is "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 			     "only connected to %u nodes and %u is needed "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 			     "to make a quorum out of %u heartbeating nodes\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 			     qs->qs_connected, quorum,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 			     qs->qs_heartbeating);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 			fence = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 		/* the even numbered cluster adds the possibility of each half
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 		 * of the cluster being able to talk amongst themselves.. in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 		 * that case we're hosed if we can't talk to the group that has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 		 * the lowest numbered node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 		quorum = qs->qs_heartbeating / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 		if (qs->qs_connected < quorum) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 			mlog(ML_ERROR, "fencing this node because it is "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 			     "only connected to %u nodes and %u is needed "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 			     "to make a quorum out of %u heartbeating nodes\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 			     qs->qs_connected, quorum,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 			     qs->qs_heartbeating);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 			fence = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 		else if ((qs->qs_connected == quorum) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 			 !lowest_reachable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 			mlog(ML_ERROR, "fencing this node because it is "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 			     "connected to a half-quorum of %u out of %u "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 			     "nodes which doesn't include the lowest active "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 			     "node %u\n", quorum, qs->qs_heartbeating,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 			     lowest_hb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 			fence = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	if (fence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 		spin_unlock(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 		o2quo_fence_self();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 		mlog(ML_NOTICE, "not fencing this node, heartbeating: %d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 			"connected: %d, lowest: %d (%sreachable)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 			qs->qs_heartbeating, qs->qs_connected, lowest_hb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 			lowest_reachable ? "" : "un");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 		spin_unlock(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) static void o2quo_set_hold(struct o2quo_state *qs, u8 node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	assert_spin_locked(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 	if (!test_and_set_bit(node, qs->qs_hold_bm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 		qs->qs_holds++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 		mlog_bug_on_msg(qs->qs_holds == O2NM_MAX_NODES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 			        "node %u\n", node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 		mlog(0, "node %u, %d total\n", node, qs->qs_holds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) static void o2quo_clear_hold(struct o2quo_state *qs, u8 node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	assert_spin_locked(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 	if (test_and_clear_bit(node, qs->qs_hold_bm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 		mlog(0, "node %u, %d total\n", node, qs->qs_holds - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 		if (--qs->qs_holds == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 			if (qs->qs_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 				qs->qs_pending = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 				schedule_work(&qs->qs_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 		mlog_bug_on_msg(qs->qs_holds < 0, "node %u, holds %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 				node, qs->qs_holds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) /* as a node comes up we delay the quorum decision until we know the fate of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)  * the connection.  the hold will be droped in conn_up or hb_down.  it might be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)  * perpetuated by con_err until hb_down.  if we already have a conn, we might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)  * be dropping a hold that conn_up got. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) void o2quo_hb_up(u8 node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 	struct o2quo_state *qs = &o2quo_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 	spin_lock(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 	qs->qs_heartbeating++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 	mlog_bug_on_msg(qs->qs_heartbeating == O2NM_MAX_NODES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 		        "node %u\n", node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 	mlog_bug_on_msg(test_bit(node, qs->qs_hb_bm), "node %u\n", node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 	set_bit(node, qs->qs_hb_bm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 	mlog(0, "node %u, %d total\n", node, qs->qs_heartbeating);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 	if (!test_bit(node, qs->qs_conn_bm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 		o2quo_set_hold(qs, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 		o2quo_clear_hold(qs, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 	spin_unlock(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) /* hb going down releases any holds we might have had due to this node from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)  * conn_up, conn_err, or hb_up */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) void o2quo_hb_down(u8 node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 	struct o2quo_state *qs = &o2quo_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 	spin_lock(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 	qs->qs_heartbeating--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 	mlog_bug_on_msg(qs->qs_heartbeating < 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 			"node %u, %d heartbeating\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 			node, qs->qs_heartbeating);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 	mlog_bug_on_msg(!test_bit(node, qs->qs_hb_bm), "node %u\n", node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 	clear_bit(node, qs->qs_hb_bm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 	mlog(0, "node %u, %d total\n", node, qs->qs_heartbeating);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 	o2quo_clear_hold(qs, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 	spin_unlock(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) /* this tells us that we've decided that the node is still heartbeating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)  * even though we've lost it's conn.  it must only be called after conn_err
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)  * and indicates that we must now make a quorum decision in the future,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)  * though we might be doing so after waiting for holds to drain.  Here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)  * we'll be dropping the hold from conn_err. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) void o2quo_hb_still_up(u8 node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 	struct o2quo_state *qs = &o2quo_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 	spin_lock(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	mlog(0, "node %u\n", node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 	qs->qs_pending = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 	o2quo_clear_hold(qs, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 	spin_unlock(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /* This is analogous to hb_up.  as a node's connection comes up we delay the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)  * quorum decision until we see it heartbeating.  the hold will be droped in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)  * hb_up or hb_down.  it might be perpetuated by con_err until hb_down.  if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)  * it's already heartbeating we might be dropping a hold that conn_up got.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)  * */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) void o2quo_conn_up(u8 node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 	struct o2quo_state *qs = &o2quo_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 	spin_lock(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 	qs->qs_connected++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 	mlog_bug_on_msg(qs->qs_connected == O2NM_MAX_NODES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 		        "node %u\n", node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 	mlog_bug_on_msg(test_bit(node, qs->qs_conn_bm), "node %u\n", node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 	set_bit(node, qs->qs_conn_bm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 	mlog(0, "node %u, %d total\n", node, qs->qs_connected);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 	if (!test_bit(node, qs->qs_hb_bm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 		o2quo_set_hold(qs, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 		o2quo_clear_hold(qs, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 	spin_unlock(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) /* we've decided that we won't ever be connecting to the node again.  if it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)  * still heartbeating we grab a hold that will delay decisions until either the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)  * node stops heartbeating from hb_down or the caller decides that the node is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)  * still up and calls still_up */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) void o2quo_conn_err(u8 node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	struct o2quo_state *qs = &o2quo_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	spin_lock(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 	if (test_bit(node, qs->qs_conn_bm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 		qs->qs_connected--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 		mlog_bug_on_msg(qs->qs_connected < 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 				"node %u, connected %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 				node, qs->qs_connected);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 		clear_bit(node, qs->qs_conn_bm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 		if (test_bit(node, qs->qs_hb_bm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 			o2quo_set_hold(qs, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 	mlog(0, "node %u, %d total\n", node, qs->qs_connected);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 	spin_unlock(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) void o2quo_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 	struct o2quo_state *qs = &o2quo_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 	spin_lock_init(&qs->qs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 	INIT_WORK(&qs->qs_work, o2quo_make_decision);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) void o2quo_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 	struct o2quo_state *qs = &o2quo_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 	flush_work(&qs->qs_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) }