Re: Race condition when replacing the broadcast timer

From: Thomas Gleixner
Date: Thu Jun 27 2024 - 07:26:55 EST


On Wed, Jun 26 2024 at 02:17, 朱恺乾 wrote:
> We find a possible race condition when replacing the broadcast
> timer. Here is how the race happend,

> 1. In thread 0, ___tick_broadcast_oneshot_control, timer 0 as a
> broadcast timer is updating the next_event.

> 2. In thread 1, tick_install_broadcast_device, timer 0 is going to be
> replaced by a new timer 1.

> 3. If thread 0 gets the broadcast timer first, it would have the old
> timer returned (timer 0). When thread 1 shuts the old timer down and
> marks it as detached, Thread 0 still have the chance to re-enable the
> old timer with a noop handler if it executes slower than thread 1.

> 4. As the old timer is binded to a CPU, when plug out that CPU, kernel
> fails at clockevents.c:653

Clearly tick_install_broadcast_device() lacks serialization.

The untested patch below should cure that.

Thanks,

tglx
---
kernel/time/clockevents.c | 31 +++++++++++++++++++------------
kernel/time/tick-broadcast.c | 36 ++++++++++++++++++++++--------------
kernel/time/tick-internal.h | 2 ++
3 files changed, 43 insertions(+), 26 deletions(-)

--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -557,23 +557,14 @@ void clockevents_handle_noop(struct cloc
{
}

-/**
- * clockevents_exchange_device - release and request clock devices
- * @old: device to release (can be NULL)
- * @new: device to request (can be NULL)
- *
- * Called from various tick functions with clockevents_lock held and
- * interrupts disabled.
- */
-void clockevents_exchange_device(struct clock_event_device *old,
- struct clock_event_device *new)
+void __clockevents_exchange_device(struct clock_event_device *old,
+ struct clock_event_device *new)
{
/*
* Caller releases a clock event device. We queue it into the
* released list and do a notify add later.
*/
if (old) {
- module_put(old->owner);
clockevents_switch_state(old, CLOCK_EVT_STATE_DETACHED);
list_move(&old->list, &clockevents_released);
}
@@ -585,6 +576,22 @@ void clockevents_exchange_device(struct
}

/**
+ * clockevents_exchange_device - release and request clock devices
+ * @old: device to release (can be NULL)
+ * @new: device to request (can be NULL)
+ *
+ * Called from various tick functions with clockevents_lock held and
+ * interrupts disabled.
+ */
+void clockevents_exchange_device(struct clock_event_device *old,
+ struct clock_event_device *new)
+{
+ if (old)
+ module_put(old->owner);
+ __clockevents_exchange_device(old, new);
+}
+
+/**
* clockevents_suspend - suspend clock devices
*/
void clockevents_suspend(void)
@@ -650,7 +657,7 @@ void tick_cleanup_dead_cpu(int cpu)
if (cpumask_test_cpu(cpu, dev->cpumask) &&
cpumask_weight(dev->cpumask) == 1 &&
!tick_is_broadcast_device(dev)) {
- BUG_ON(!clockevent_state_detached(dev));
+ WARN_ON(!clockevent_state_detached(dev));
list_del(&dev->list);
}
}
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -162,23 +162,31 @@ static bool tick_set_oneshot_wakeup_devi
*/
void tick_install_broadcast_device(struct clock_event_device *dev, int cpu)
{
- struct clock_event_device *cur = tick_broadcast_device.evtdev;
+ struct clock_event_device *cur;

- if (tick_set_oneshot_wakeup_device(dev, cpu))
- return;
+ scoped_guard(raw_spinlock_irqsave, &tick_broadcast_lock) {

- if (!tick_check_broadcast_device(cur, dev))
- return;
+ if (tick_set_oneshot_wakeup_device(dev, cpu))
+ return;

- if (!try_module_get(dev->owner))
- return;
+ cur = tick_broadcast_device.evtdev;
+ if (!tick_check_broadcast_device(cur, dev))
+ return;

- clockevents_exchange_device(cur, dev);
+ if (!try_module_get(dev->owner))
+ return;
+
+ __clockevents_exchange_device(cur, dev);
+ if (cur)
+ cur->event_handler = clockevents_handle_noop;
+ WRITE_ONCE(tick_broadcast_device.evtdev, dev);
+ if (!cpumask_empty(tick_broadcast_mask))
+ tick_broadcast_start_periodic(dev);
+ }
+
+ /* Module release must be outside of the lock */
if (cur)
- cur->event_handler = clockevents_handle_noop;
- tick_broadcast_device.evtdev = dev;
- if (!cpumask_empty(tick_broadcast_mask))
- tick_broadcast_start_periodic(dev);
+ module_put(old->owner);

if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
return;
@@ -1185,7 +1193,7 @@ int tick_broadcast_oneshot_active(void)
*/
bool tick_broadcast_oneshot_available(void)
{
- struct clock_event_device *bc = tick_broadcast_device.evtdev;
+ struct clock_event_device *bc = READ_ONCE(tick_broadcast_device.evtdev);

return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
}
@@ -1193,7 +1201,7 @@ bool tick_broadcast_oneshot_available(vo
#else
int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
{
- struct clock_event_device *bc = tick_broadcast_device.evtdev;
+ struct clock_event_device *bc = READ_ONCE(tick_broadcast_device.evtdev);

if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER))
return -EBUSY;
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -53,6 +53,8 @@ static inline void clockevent_set_state(
}

extern void clockevents_shutdown(struct clock_event_device *dev);
+extern void __clockevents_exchange_device(struct clock_event_device *old,
+ struct clock_event_device *new);
extern void clockevents_exchange_device(struct clock_event_device *old,
struct clock_event_device *new);
extern void clockevents_switch_state(struct clock_event_device *dev,