Re: [PATCH] sched/cputime: correct account of irqtime

From: Pingfan Liu
Date: Tue Oct 13 2020 - 00:26:32 EST


On Tue, Oct 13, 2020 at 11:10 AM jun qian <qianjun.kernel@xxxxxxxxx> wrote:
>
> Pingfan Liu <kernelfans@xxxxxxxxx> 于2020年10月12日周一 下午9:54写道:
> >
> > __do_softirq() may be interrupted by hardware interrupts. In this case,
> > irqtime_account_irq() will account the time slice as CPUTIME_SOFTIRQ by
> > mistake.
> >
> > By passing irqtime_account_irq() an extra param about either hardirq or
> > softirq, irqtime_account_irq() can handle the above case.
> >
> > Signed-off-by: Pingfan Liu <kernelfans@xxxxxxxxx>
> > Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> > Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> > Cc: Juri Lelli <juri.lelli@xxxxxxxxxx>
> > Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
> > Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
> > Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
> > Cc: Ben Segall <bsegall@xxxxxxxxxx>
> > Cc: Mel Gorman <mgorman@xxxxxxx>
> > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> > Cc: Andy Lutomirski <luto@xxxxxxxxxx>
> > Cc: Will Deacon <will@xxxxxxxxxx>
> > Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxx>
> > Cc: Frederic Weisbecker <frederic@xxxxxxxxxx>
> > Cc: Allen Pais <allen.lkml@xxxxxxxxx>
> > Cc: Romain Perier <romain.perier@xxxxxxxxx>
> > To: linux-kernel@xxxxxxxxxxxxxxx
> > ---
> > include/linux/hardirq.h | 4 ++--
> > include/linux/vtime.h | 12 ++++++------
> > kernel/sched/cputime.c | 4 ++--
> > kernel/softirq.c | 6 +++---
> > 4 files changed, 13 insertions(+), 13 deletions(-)
> >
> > diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
> > index 754f67a..56e7bb5 100644
> > --- a/include/linux/hardirq.h
> > +++ b/include/linux/hardirq.h
> > @@ -32,7 +32,7 @@ static __always_inline void rcu_irq_enter_check_tick(void)
> > */
> > #define __irq_enter() \
> > do { \
> > - account_irq_enter_time(current); \
> > + account_irq_enter_time(current, true); \
> > preempt_count_add(HARDIRQ_OFFSET); \
> > lockdep_hardirq_enter(); \
> > } while (0)
> > @@ -63,7 +63,7 @@ void irq_enter_rcu(void);
> > #define __irq_exit() \
> > do { \
> > lockdep_hardirq_exit(); \
> > - account_irq_exit_time(current); \
> > + account_irq_exit_time(current, true); \
> > preempt_count_sub(HARDIRQ_OFFSET); \
> > } while (0)
> >
> > diff --git a/include/linux/vtime.h b/include/linux/vtime.h
> > index 2cdeca0..294188ae1 100644
> > --- a/include/linux/vtime.h
> > +++ b/include/linux/vtime.h
> > @@ -98,21 +98,21 @@ static inline void vtime_flush(struct task_struct *tsk) { }
> >
> >
> > #ifdef CONFIG_IRQ_TIME_ACCOUNTING
> > -extern void irqtime_account_irq(struct task_struct *tsk);
> > +extern void irqtime_account_irq(struct task_struct *tsk, bool hardirq);
> > #else
> > -static inline void irqtime_account_irq(struct task_struct *tsk) { }
> > +static inline void irqtime_account_irq(struct task_struct *tsk, bool hardirq) { }
> > #endif
> >
> > -static inline void account_irq_enter_time(struct task_struct *tsk)
> > +static inline void account_irq_enter_time(struct task_struct *tsk, bool hardirq)
> > {
> > vtime_account_irq_enter(tsk);
> > - irqtime_account_irq(tsk);
> > + irqtime_account_irq(tsk, hardirq);
> > }
> >
> > -static inline void account_irq_exit_time(struct task_struct *tsk)
> > +static inline void account_irq_exit_time(struct task_struct *tsk, bool hardirq)
> > {
> > vtime_account_irq_exit(tsk);
> > - irqtime_account_irq(tsk);
> > + irqtime_account_irq(tsk, hardirq);
> > }
> >
> > #endif /* _LINUX_KERNEL_VTIME_H */
> > diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> > index 5a55d23..166f1d7 100644
> > --- a/kernel/sched/cputime.c
> > +++ b/kernel/sched/cputime.c
> > @@ -47,7 +47,7 @@ static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
> > * Called before incrementing preempt_count on {soft,}irq_enter
> > * and before decrementing preempt_count on {soft,}irq_exit.
> > */
> > -void irqtime_account_irq(struct task_struct *curr)
> > +void irqtime_account_irq(struct task_struct *curr, bool hardirq)
> > {
> > struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
> > s64 delta;
> > @@ -68,7 +68,7 @@ void irqtime_account_irq(struct task_struct *curr)
> > */
> > if (hardirq_count())
> > irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
> > - else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
> > + else if (in_serving_softirq() && curr != this_cpu_ksoftirqd() && !hardirq)
> > irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
> > }
>
> In my opinion, we don't need to use the hardirq flag, the code: if
> (hardirq_count())
> already tell us that where the delt time is from.

Considering the scenario in which hardirq happens immediately after
__do_softirq()->local_irq_enable(). The following code shows that
hardirq_count() can not help.
#define __irq_enter() \
do { \
account_irq_enter_time(current); \
preempt_count_add(HARDIRQ_OFFSET); \
lockdep_hardirq_enter(); \
} while (0)

Anything I missed?

Thanks,
Pingfan