Re: [PATCH] clockevents: Prevent timer interrupt starvation

From: Thomas Gleixner

Date: Fri Apr 03 2026 - 12:26:11 EST


On Fri, Apr 03 2026 at 14:16, Peter Zijlstra wrote:
> On Thu, Apr 02, 2026 at 07:07:49PM +0200, Thomas Gleixner wrote:
> /**
> @@ -324,16 +325,31 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
> return dev->set_next_ktime(expires, dev);
>
> delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
> - if (delta <= 0)
> - return force ? clockevents_program_min_delta(dev) : -ETIME;
> + if (delta <= 0) {
> + rc = -ETIME;
> + goto error;
> + }

That's not working in the case that user space manages to set the expiry
time so it stays in the min_delta_ns window, which is doable. I just
tried. Then we are back to square one.

Less convoluted but untested version of my initial idea below.

Thanks,

tglx
---
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -80,6 +80,7 @@ enum clock_event_state {
* @shift: nanoseconds to cycles divisor (power of two)
* @state_use_accessors:current state of the device, assigned by the core code
* @features: features
+ * @next_event_forced: True if the last programming was a forced event
* @retries: number of forced programming retries
* @set_state_periodic: switch state to periodic
* @set_state_oneshot: switch state to oneshot
@@ -108,6 +109,7 @@ struct clock_event_device {
u32 shift;
enum clock_event_state state_use_accessors;
unsigned int features;
+ unsigned int next_event_forced;
unsigned long retries;

int (*set_state_periodic)(struct clock_event_device *);
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -172,6 +172,7 @@ void clockevents_shutdown(struct clock_e
{
clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
dev->next_event = KTIME_MAX;
+ dev->next_event_forced = 0;
}

/**
@@ -224,13 +225,7 @@ static int clockevents_increase_min_delt
return 0;
}

-/**
- * clockevents_program_min_delta - Set clock event device to the minimum delay.
- * @dev: device to program
- *
- * Returns 0 on success, -ETIME when the retry loop failed.
- */
-static int clockevents_program_min_delta(struct clock_event_device *dev)
+static int __clockevents_program_min_delta(struct clock_event_device *dev)
{
unsigned long long clc;
int64_t delta;
@@ -263,13 +258,7 @@ static int clockevents_program_min_delta

#else /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */

-/**
- * clockevents_program_min_delta - Set clock event device to the minimum delay.
- * @dev: device to program
- *
- * Returns 0 on success, -ETIME when the retry loop failed.
- */
-static int clockevents_program_min_delta(struct clock_event_device *dev)
+static int __clockevents_program_min_delta(struct clock_event_device *dev)
{
unsigned long long clc;
int64_t delta = 0;
@@ -293,6 +282,21 @@ static int clockevents_program_min_delta
#endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */

/**
+ * clockevents_program_min_delta - Set clock event device to the minimum delay.
+ * @dev: device to program
+ *
+ * Returns 0 on success, -ETIME when the retry loop failed.
+ */
+static int clockevents_program_min_delta(struct clock_event_device *dev)
+{
+ if (dev->next_event_forced)
+ return 0;
+
+ dev->next_event_forced = 1;
+ return __clockevents_program_min_delta(dev);
+}
+
+/**
* clockevents_program_event - Reprogram the clock event device.
* @dev: device to program
* @expires: absolute expiry time (monotonic clock)
@@ -324,16 +328,18 @@ int clockevents_program_event(struct clo
return dev->set_next_ktime(expires, dev);

delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
- if (delta <= 0)
- return force ? clockevents_program_min_delta(dev) : -ETIME;

- delta = min(delta, (int64_t) dev->max_delta_ns);
- delta = max(delta, (int64_t) dev->min_delta_ns);
+ if (!dev->next_event_forced || delta > dev->min_delta_ns) {

- clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
- rc = dev->set_next_event((unsigned long) clc, dev);
+ delta = min(delta, (int64_t) dev->max_delta_ns);
+ delta = max(delta, (int64_t) dev->min_delta_ns);
+ clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
+ rc = dev->set_next_event((unsigned long) clc, dev);
+ if (!rc)
+ return 0;
+ }

- return (rc && force) ? clockevents_program_min_delta(dev) : rc;
+ return force ? clockevents_program_min_delta(dev) : rc;
}

/*
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1888,6 +1888,7 @@ void hrtimer_interrupt(struct clock_even
BUG_ON(!cpu_base->hres_active);
cpu_base->nr_events++;
dev->next_event = KTIME_MAX;
+ dev->next_event_forced = 0;

raw_spin_lock_irqsave(&cpu_base->lock, flags);
entry_time = now = hrtimer_update_base(cpu_base);
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -110,6 +110,7 @@ void tick_handle_periodic(struct clock_e
int cpu = smp_processor_id();
ktime_t next = dev->next_event;

+ dev->next_event_forced = 0;
tick_periodic(cpu);

/*
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1513,6 +1513,7 @@ static void tick_nohz_lowres_handler(str
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

dev->next_event = KTIME_MAX;
+ dev->next_event_forced = 0;

if (likely(tick_nohz_handler(&ts->sched_timer) == HRTIMER_RESTART))
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);