Re: [patch V2 02/10] hrtimer: Consolidate reprogramming code

From: Marek Szyprowski
Date: Thu Aug 12 2021 - 09:09:49 EST


Hi,

On 13.07.2021 15:39, Thomas Gleixner wrote:
> From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
>
> This code is mostly duplicated. The redudant store in the force reprogram
> case does no harm and the in hrtimer interrupt condition cannot be true for
> the force reprogram invocations.
>
> Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> ---
> kernel/time/hrtimer.c | 72 ++++++++++++++++++++------------------------------
> 1 file changed, 29 insertions(+), 43 deletions(-)
>
> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>

This patch landed in today's linux-next (next-20210812) as commit
b14bca97c9f5 ("hrtimer: Consolidate reprogramming code"). It breaks
booting of many of my test machines: ARM 32bit Exynos based boards, ARM
64bit QEmu virt machine or ARM64 Qualcomm DragonBoard410c board.

I've managed to catch the following log on QEmu's virt ARM64 machine:

rcu: INFO: rcu_preempt detected stalls on CPUs/tasks:
rcu:     0-...!: (0 ticks this GP) idle=330/0/0x0 softirq=42/42 fqs=0 
(false positive?)
 (detected by 1, t=6502 jiffies, g=-1091, q=115)

============================================
WARNING: possible recursive locking detected
5.14.0-rc5+ #10668 Not tainted
--------------------------------------------
swapper/1/0 is trying to acquire lock:
ffffbb9c1e4ca1d8 (rcu_node_0){-.-.}-{2:2}, at:
rcu_dump_cpu_stacks+0x68/0x1c4

but task is already holding lock:
ffffbb9c1e4ca1d8 (rcu_node_0){-.-.}-{2:2}, at:
rcu_sched_clock_irq+0x83c/0x1778

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(rcu_node_0);
  lock(rcu_node_0);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

1 lock held by swapper/1/0:
 #0: ffffbb9c1e4ca1d8 (rcu_node_0){-.-.}-{2:2}, at:
rcu_sched_clock_irq+0x83c/0x1778

stack backtrace:
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.14.0-rc5+ #10668
Hardware name: linux,dummy-virt (DT)
Call trace:
 dump_backtrace+0x0/0x1d0
 show_stack+0x14/0x20
 dump_stack_lvl+0x88/0xb0
 dump_stack+0x14/0x2c
 __lock_acquire+0x17a4/0x1840
 lock_acquire+0x130/0x3e8
 _raw_spin_lock_irqsave+0x78/0x148
 rcu_dump_cpu_stacks+0x68/0x1c4
 rcu_sched_clock_irq+0x11e8/0x1778
 update_process_times+0x88/0xd0
 tick_sched_handle.isra.19+0x30/0x50
 tick_sched_timer+0x48/0x98
 __hrtimer_run_queues+0x380/0x5b0
 hrtimer_interrupt+0xe4/0x240
 arch_timer_handler_virt+0x30/0x40
 handle_percpu_devid_irq+0xc0/0x3d0
 handle_domain_irq+0x58/0x88
 gic_handle_irq+0xa8/0xc8
 call_on_irq_stack+0x28/0x38
 do_interrupt_handler+0x54/0x60
 el1_interrupt+0x2c/0x108
 el1h_64_irq_handler+0x14/0x20
 el1h_64_irq+0x74/0x78
 arch_cpu_idle+0x14/0x20
 default_idle_call+0x88/0x390
 do_idle+0x200/0x290
 cpu_startup_entry+0x20/0x80
 secondary_start_kernel+0x1c0/0x1f0
 __secondary_switched+0x7c/0x80

I hope it helps fixing the issue.

> --- a/kernel/time/hrtimer.c
> +++ b/kernel/time/hrtimer.c
> @@ -652,21 +652,24 @@ static inline int hrtimer_hres_active(vo
> return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
> }
>
> -/*
> - * Reprogram the event source with checking both queues for the
> - * next event
> - * Called with interrupts disabled and base->lock held
> - */
> static void
> -hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
> +__hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal,
> + struct hrtimer *next_timer, ktime_t expires_next)
> {
> - ktime_t expires_next;
> + /*
> + * If the hrtimer interrupt is running, then it will reevaluate the
> + * clock bases and reprogram the clock event device.
> + */
> + if (cpu_base->in_hrtirq)
> + return;
>
> - expires_next = hrtimer_update_next_event(cpu_base);
> + if (expires_next > cpu_base->expires_next)
> + return;
>
> if (skip_equal && expires_next == cpu_base->expires_next)
> return;
>
> + cpu_base->next_timer = next_timer;
> cpu_base->expires_next = expires_next;
>
> /*
> @@ -689,7 +692,23 @@ hrtimer_force_reprogram(struct hrtimer_c
> if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
> return;
>
> - tick_program_event(cpu_base->expires_next, 1);
> + tick_program_event(expires_next, 1);
> +}
> +
> +/*
> + * Reprogram the event source with checking both queues for the
> + * next event
> + * Called with interrupts disabled and base->lock held
> + */
> +static void
> +hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
> +{
> + ktime_t expires_next;
> +
> + expires_next = hrtimer_update_next_event(cpu_base);
> +
> + __hrtimer_reprogram(cpu_base, skip_equal, cpu_base->next_timer,
> + expires_next);
> }
>
> /* High resolution timer related functions */
> @@ -835,40 +854,7 @@ static void hrtimer_reprogram(struct hrt
> if (base->cpu_base != cpu_base)
> return;
>
> - /*
> - * If the hrtimer interrupt is running, then it will
> - * reevaluate the clock bases and reprogram the clock event
> - * device. The callbacks are always executed in hard interrupt
> - * context so we don't need an extra check for a running
> - * callback.
> - */
> - if (cpu_base->in_hrtirq)
> - return;
> -
> - if (expires >= cpu_base->expires_next)
> - return;
> -
> - /* Update the pointer to the next expiring timer */
> - cpu_base->next_timer = timer;
> - cpu_base->expires_next = expires;
> -
> - /*
> - * If hres is not active, hardware does not have to be
> - * programmed yet.
> - *
> - * If a hang was detected in the last timer interrupt then we
> - * do not schedule a timer which is earlier than the expiry
> - * which we enforced in the hang detection. We want the system
> - * to make progress.
> - */
> - if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
> - return;
> -
> - /*
> - * Program the timer hardware. We enforce the expiry for
> - * events which are already in the past.
> - */
> - tick_program_event(expires, 1);
> + __hrtimer_reprogram(cpu_base, true, timer, expires);
> }
>
> /*
>
>
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland