[PATCH] ocfs2/cluster: reject local node clears while heartbeat runs
From: Cen Zhang
Date: Sun Jun 14 2026 - 01:34:03 EST
o2nm_node_local_store() can clear the current local node while a
heartbeat region is still active. That path stops listening and stores
O2NM_INVALID_NODE_NUM, but heartbeat threads keep calling
o2nm_this_node() from o2hb_do_disk_heartbeat() and shutdown.
Once that happens, o2hb_check_own_slot() and o2hb_prepare_block() can
use node 255 as a slot index, and o2nm_undepend_this_node() can BUG_ON()
when the thread exits. The same local=0 path also leaves cl_has_local
set, so later attempts to restore a real local node fail with -EBUSY.
Track the heartbeat start/run/stop window with a per-region active flag
protected by o2hb_live_lock. Set it before kthread_run() can let the
heartbeat thread re-read o2nm_this_node(), and clear it only after
startup cleanup or region teardown has stopped any created thread. Then
reject local=0 while any region remains active, and when the clear is
allowed drop cl_has_local together with cl_local_node so the
nodemanager state matches o2nm_node_group_drop_item().
The buggy scenario involves two paths, with each column showing the
order within that path:
configfs local attribute store heartbeat thread
1. Userspace writes 0 to the 1. o2hb_thread() keeps calling
current node's local attribute. o2hb_do_disk_heartbeat().
2. o2nm_node_local_store() 2. o2hb_check_own_slot() and
stops listening and stores o2hb_prepare_block() re-read
O2NM_INVALID_NODE_NUM. o2nm_this_node().
3. cl_has_local still stays set, 3. The thread uses node 255 as
so later o2nm_this_node() the local slot index or
reports node 255. shutdown node number.
4. A later local=1 write hits 4. Heartbeat can touch
-EBUSY. reg->hr_slots[255] or BUG_ON()
in o2nm_undepend_this_node().
Validation reproduced this kernel report:
KASAN slab-out-of-bounds in o2hb_do_disk_heartbeat+0x372/0xb30
RIP: 0010:memset+0xf/0x20
Read of size 8
Call trace:
dump_stack_lvl+0x66/0xa0
print_report+0xd0/0x630
o2hb_do_disk_heartbeat+0x372/0xb30 (fs/ocfs2/cluster/heartbeat.c:1079)
srso_alias_return_thunk+0x5/0xfbef5
__virt_addr_valid+0x188/0x2f0
kasan_report+0xe4/0x120
o2hb_do_disk_heartbeat+0x5/0xb30 (fs/ocfs2/cluster/heartbeat.c:1079)
o2hb_thread+0x14e/0x770
kthread_affine_node+0x139/0x180
lockdep_hardirqs_on_prepare+0xda/0x190
trace_hardirqs_on+0x18/0x130
kthread+0x19d/0x1e0
ret_from_fork+0x37a/0x4d0
__switch_to+0x2d5/0x6f0
ret_from_fork_asm+0x1a/0x30
Fixes: a7f6a5fb4bde ("[PATCH] OCFS2: The Second Oracle Cluster Filesystem")
Assisted-by: Codex:gpt-5.5
Signed-off-by: Cen Zhang <zzzccc427@xxxxxxxxx>
---
fs/ocfs2/cluster/heartbeat.c | 55 +++++++++++++++++++++++++++++++---
fs/ocfs2/cluster/heartbeat.h | 1 +
fs/ocfs2/cluster/nodemanager.c | 13 ++++----
fs/ocfs2/cluster/nodemanager.h | 3 ++
4 files changed, 63 insertions(+), 9 deletions(-)
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index d12784aaaa4b..4c83f680017f 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -201,8 +201,9 @@ struct o2hb_region {
hr_item_dropped:1,
hr_node_deleted:1;
- /* protected by the hr_callback_sem */
+ /* Protected by o2hb_live_lock. */
struct task_struct *hr_task;
+ bool hr_heartbeat_active;
unsigned int hr_blocks;
unsigned long long hr_start_block;
@@ -1771,8 +1772,9 @@ static int o2hb_populate_slot_data(struct o2hb_region *reg)
}
/*
- * this is acting as commit; we set up all of hr_bdev_file and hr_task or
- * nothing
+ * This acts as commit. The active flag covers the start/run/stop window and
+ * startup publishes it before rechecking the local node, so local=0 either
+ * sees heartbeat in progress or forces this path to fail before a thread runs.
*/
static ssize_t o2hb_region_dev_store(struct config_item *item,
const char *page,
@@ -1874,6 +1876,17 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
/* unsteady_iterations is triple the steady_iterations */
atomic_set(®->hr_unsteady_iterations, (live_threshold * 3));
+ o2nm_lock_subsystem();
+ spin_lock(&o2hb_live_lock);
+ reg->hr_heartbeat_active = true;
+ spin_unlock(&o2hb_live_lock);
+ if (o2nm_this_node() == O2NM_MAX_NODES) {
+ o2nm_unlock_subsystem();
+ ret = -EINVAL;
+ goto out;
+ }
+ o2nm_unlock_subsystem();
+
hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
reg->hr_item.ci_name);
if (IS_ERR(hb_task)) {
@@ -1925,10 +1938,16 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
spin_lock(&o2hb_live_lock);
hb_task = reg->hr_task;
reg->hr_task = NULL;
+ if (!hb_task)
+ reg->hr_heartbeat_active = false;
spin_unlock(&o2hb_live_lock);
- if (hb_task)
+ if (hb_task) {
kthread_stop(hb_task);
+ spin_lock(&o2hb_live_lock);
+ reg->hr_heartbeat_active = false;
+ spin_unlock(&o2hb_live_lock);
+ }
o2hb_unmap_slot_data(reg);
@@ -2109,6 +2128,11 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
if (hb_task)
kthread_stop(hb_task);
+ spin_lock(&o2hb_live_lock);
+ if (hb_task)
+ reg->hr_heartbeat_active = false;
+ spin_unlock(&o2hb_live_lock);
+
if (o2hb_global_heartbeat_active()) {
spin_lock(&o2hb_live_lock);
clear_bit(reg->hr_region_num, o2hb_region_bitmap);
@@ -2571,6 +2595,29 @@ int o2hb_get_all_regions(char *region_uuids, u8 max_regions)
}
EXPORT_SYMBOL_GPL(o2hb_get_all_regions);
+/*
+ * The active flag covers the entire start/run/stop window in which a region
+ * may still have a heartbeat thread that can re-read o2nm_this_node().
+ */
+int o2hb_heartbeat_active(void)
+{
+ struct o2hb_region *reg;
+ int active = 0;
+
+ spin_lock(&o2hb_live_lock);
+
+ list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+ if (reg->hr_heartbeat_active) {
+ active = 1;
+ break;
+ }
+ }
+
+ spin_unlock(&o2hb_live_lock);
+
+ return active;
+}
+
int o2hb_global_heartbeat_active(void)
{
return (o2hb_heartbeat_mode == O2HB_HEARTBEAT_GLOBAL);
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h
index 8ef8c1b9eeb7..7040dc4b529d 100644
--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -66,6 +66,7 @@ int o2hb_check_node_heartbeating_no_sem(u8 node_num);
int o2hb_check_node_heartbeating_from_callback(u8 node_num);
void o2hb_stop_all_regions(void);
int o2hb_get_all_regions(char *region_uuids, u8 numregions);
+int o2hb_heartbeat_active(void);
int o2hb_global_heartbeat_active(void);
#endif /* O2CLUSTER_HEARTBEAT_H */
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 402563154550..ffe9cb37d9e5 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -25,9 +25,6 @@ static const char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = {
"panic", /* O2NM_FENCE_PANIC */
};
-static inline void o2nm_lock_subsystem(void);
-static inline void o2nm_unlock_subsystem(void);
-
struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
{
struct o2nm_node *node = NULL;
@@ -366,7 +363,13 @@ static ssize_t o2nm_node_local_store(struct config_item *item, const char *page,
if (!tmp && cluster->cl_has_local &&
cluster->cl_local_node == node->nd_num) {
+ if (o2hb_heartbeat_active()) {
+ ret = -EBUSY;
+ goto out;
+ }
+
o2net_stop_listening(node);
+ cluster->cl_has_local = 0;
cluster->cl_local_node = O2NM_INVALID_NODE_NUM;
}
@@ -762,12 +765,12 @@ static struct o2nm_cluster_group o2nm_cluster_group = {
},
};
-static inline void o2nm_lock_subsystem(void)
+void o2nm_lock_subsystem(void)
{
mutex_lock(&o2nm_cluster_group.cs_subsys.su_mutex);
}
-static inline void o2nm_unlock_subsystem(void)
+void o2nm_unlock_subsystem(void)
{
mutex_unlock(&o2nm_cluster_group.cs_subsys.su_mutex);
}
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index 3490e77a952d..96c0abb1f127 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -63,6 +63,9 @@ struct o2nm_node *o2nm_get_node_by_ip(__be32 addr);
void o2nm_node_get(struct o2nm_node *node);
void o2nm_node_put(struct o2nm_node *node);
+void o2nm_lock_subsystem(void);
+void o2nm_unlock_subsystem(void);
+
int o2nm_depend_item(struct config_item *item);
void o2nm_undepend_item(struct config_item *item);
int o2nm_depend_this_node(void);
--
2.43.0