[PATCH v2 08/12] rv: Ensure synchronous cleanup for HA monitors
From: Gabriele Monaco
Date: Wed May 27 2026 - 02:27:28 EST
HA monitors may start timers, all cleanup functions currently stop the
timers asynchronously to avoid sleeping in the wrong context.
Nothing makes sure running callbacks terminate on cleanup.
Run the entire HA timer callback in an RCU read-side critical section,
this way we can simply synchronize_rcu() with any pending timer and are
sure any cleanup using kfree_rcu() runs after callbacks terminated.
Additionally make sure any unlikely callback running late won't run any
code if the monitor is marked as disabled.
Use memory barriers to serialise with racing resets.
Fixes: f5587d1b6ec9 ("rv: Add Hybrid Automata monitor type")
Fixes: 4a24127bd6cb ("rv: Add support for per-object monitors in DA/HA")
Signed-off-by: Gabriele Monaco <gmonaco@xxxxxxxxxx>
---
include/rv/da_monitor.h | 26 +++++++++++++++++++++-----
include/rv/ha_monitor.h | 19 +++++++++++++++++--
2 files changed, 38 insertions(+), 7 deletions(-)
diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h
index c1e347f9deaf..268f22933663 100644
--- a/include/rv/da_monitor.h
+++ b/include/rv/da_monitor.h
@@ -57,6 +57,15 @@ static struct rv_monitor rv_this;
#define da_monitor_reset_hook(da_mon)
#endif
+/*
+ * Hook to allow the implementation of hybrid automata: define it with a
+ * function that waits for the termination of all monitors background
+ * activities (e.g. all timers). This hook can sleep.
+ */
+#ifndef da_monitor_sync_hook
+#define da_monitor_sync_hook()
+#endif
+
/*
* Type for the target id, default to int but can be overridden.
* A long type can work as hash table key (PER_OBJ) but will be downgraded to
@@ -83,7 +92,8 @@ static inline void da_monitor_reset(struct da_monitor *da_mon)
{
da_monitor_reset_hook(da_mon);
WRITE_ONCE(da_mon->monitoring, 0);
- da_mon->curr_state = model_get_initial_state();
+ /* Pair with load in __ha_monitor_timer_callback */
+ smp_store_release(&da_mon->curr_state, model_get_initial_state());
}
/*
@@ -181,6 +191,7 @@ static inline int da_monitor_init(void)
static inline void da_monitor_destroy(void)
{
da_monitor_reset_all();
+ da_monitor_sync_hook();
}
#ifndef da_implicit_guard
@@ -234,6 +245,7 @@ static inline int da_monitor_init(void)
static inline void da_monitor_destroy(void)
{
da_monitor_reset_all();
+ da_monitor_sync_hook();
}
#ifndef da_implicit_guard
@@ -324,6 +336,7 @@ static inline void da_monitor_destroy(void)
}
da_monitor_reset_all();
+ da_monitor_sync_hook();
tracepoint_synchronize_unregister();
rv_put_task_monitor_slot(task_mon_slot);
@@ -503,10 +516,9 @@ static void da_monitor_reset_all(void)
struct da_monitor_storage *mon_storage;
int bkt;
- rcu_read_lock();
+ guard(rcu)();
hash_for_each_rcu(da_monitor_ht, bkt, mon_storage, node)
da_monitor_reset(&mon_storage->rv.da_mon);
- rcu_read_unlock();
}
static inline int da_monitor_init(void)
@@ -522,13 +534,17 @@ static inline void da_monitor_destroy(void)
int bkt;
tracepoint_synchronize_unregister();
+ scoped_guard(rcu) {
+ hash_for_each_rcu(da_monitor_ht, bkt, mon_storage, node) {
+ da_monitor_reset_hook(&mon_storage->rv.da_mon);
+ }
+ }
+ da_monitor_sync_hook();
/*
* This function is called after all probes are disabled and no longer
* pending, we can safely assume no concurrent user.
*/
- synchronize_rcu();
hash_for_each_safe(da_monitor_ht, bkt, tmp, mon_storage, node) {
- da_monitor_reset_hook(&mon_storage->rv.da_mon);
hash_del_rcu(&mon_storage->node);
kfree(mon_storage);
}
diff --git a/include/rv/ha_monitor.h b/include/rv/ha_monitor.h
index d59507e8cb30..661631bc933a 100644
--- a/include/rv/ha_monitor.h
+++ b/include/rv/ha_monitor.h
@@ -36,6 +36,7 @@ static bool ha_monitor_handle_constraint(struct da_monitor *da_mon,
#define da_monitor_event_hook ha_monitor_handle_constraint
#define da_monitor_init_hook ha_monitor_init_env
#define da_monitor_reset_hook ha_monitor_reset_env
+#define da_monitor_sync_hook() synchronize_rcu()
#include <rv/da_monitor.h>
#include <linux/seq_buf.h>
@@ -237,12 +238,26 @@ static bool ha_monitor_handle_constraint(struct da_monitor *da_mon,
return false;
}
+/*
+ * __ha_monitor_timer_callback - generic callback representation
+ *
+ * This callback runs in an RCU read-side critical section to allow the
+ * destruction sequence to easily synchronize_rcu() with all pending timer
+ * after asynchronously disabling them.
+ */
static inline void __ha_monitor_timer_callback(struct ha_monitor *ha_mon)
{
- enum states curr_state = READ_ONCE(ha_mon->da_mon.curr_state);
DECLARE_SEQ_BUF(env_string, ENV_BUFFER_SIZE);
- u64 time_ns = ha_get_ns();
+ enum states curr_state;
+ u64 time_ns;
+
+ guard(rcu)();
+ /* Ensure consistent curr_state if we race with da_monitor_reset */
+ curr_state = smp_load_acquire(&ha_mon->da_mon.curr_state);
+ if (unlikely(!da_monitor_handling_event(&ha_mon->da_mon)))
+ return;
+ time_ns = ha_get_ns();
ha_get_env_string(&env_string, ha_mon, time_ns);
ha_react(curr_state, EVENT_NONE, env_string.buffer);
ha_trace_error_env(ha_mon, model_get_state_name(curr_state),
--
2.54.0