Re: [RFC PATCH] psi: introduce PSI UNINTERRUPTIBLE
From: Zhaoyang Huang
Date: Mon Aug 08 2022 - 05:39:58 EST
On Mon, Aug 8, 2022 at 5:12 PM Chengming Zhou
<zhouchengming@xxxxxxxxxxxxx> wrote:
>
> On 2022/8/8 14:13, zhaoyang.huang wrote:
> > From: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx>
> >
> > Uninterruptible sleep has not been monitored as an important system status yet.
> > Imagin that a set of psi triggers are created for monitoring a special group, while
> > get nothing high for none of the pressures, which could be the processes within
> > are stock in some given resources and turn to be UN status. Introduce PSI_UN as
> > a sub-type among PSI system here.
>
> Hello,
>
> The problem is that not all TASK_UNINTERRUPTIBLE task means stalled on some
> shared resource, like many schedule_timeout() paths.
Thanks for heads up. The aim is to distinguish where the processes go
(on or off cpu? waiting for anything?). It could be deemed as PSI_MEM
like property which is not a precise running time for memory things.
Furthermore, we do have method to make it be precise.
>
> Thanks.
>
> >
> > Signed-off-by: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx>
> > ---
> > include/linux/psi_types.h | 11 ++++++++---
> > kernel/sched/psi.c | 10 ++++++++++
> > kernel/sched/stats.h | 6 +++++-
> > 3 files changed, 23 insertions(+), 4 deletions(-)
> >
> > diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
> > index c7fe7c0..8cc1979 100644
> > --- a/include/linux/psi_types.h
> > +++ b/include/linux/psi_types.h
> > @@ -15,6 +15,7 @@ enum psi_task_count {
> > NR_IOWAIT,
> > NR_MEMSTALL,
> > NR_RUNNING,
> > + NR_UNINTERRUPTIBLE,
> > /*
> > * This can't have values other than 0 or 1 and could be
> > * implemented as a bit flag. But for now we still have room
> > @@ -32,7 +33,7 @@ enum psi_task_count {
> > * threads and memstall ones.
> > */
> > NR_MEMSTALL_RUNNING,
> > - NR_PSI_TASK_COUNTS = 5,
> > + NR_PSI_TASK_COUNTS = 6,
> > };
> >
> > /* Task state bitmasks */
> > @@ -41,13 +42,15 @@ enum psi_task_count {
> > #define TSK_RUNNING (1 << NR_RUNNING)
> > #define TSK_ONCPU (1 << NR_ONCPU)
> > #define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
> > +#define TSK_UNINTERRUPTIBLE (1 << NR_UNINTERRUPTIBLE)
> >
> > /* Resources that workloads could be stalled on */
> > enum psi_res {
> > PSI_IO,
> > PSI_MEM,
> > PSI_CPU,
> > - NR_PSI_RESOURCES = 3,
> > + PSI_UN,
> > + NR_PSI_RESOURCES = 4,
> > };
> >
> > /*
> > @@ -63,9 +66,11 @@ enum psi_states {
> > PSI_MEM_FULL,
> > PSI_CPU_SOME,
> > PSI_CPU_FULL,
> > + PSI_UN_SOME,
> > + PSI_UN_FULL,
> > /* Only per-CPU, to weigh the CPU in the global average: */
> > PSI_NONIDLE,
> > - NR_PSI_STATES = 7,
> > + NR_PSI_STATES = 9,
> > };
> >
> > enum psi_aggregators {
> > diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> > index a337f3e..a37b4a4 100644
> > --- a/kernel/sched/psi.c
> > +++ b/kernel/sched/psi.c
> > @@ -231,6 +231,10 @@ static bool test_state(unsigned int *tasks, enum psi_states state)
> > return unlikely(tasks[NR_RUNNING] > tasks[NR_ONCPU]);
> > case PSI_CPU_FULL:
> > return unlikely(tasks[NR_RUNNING] && !tasks[NR_ONCPU]);
> > + case PSI_UN_SOME:
> > + return unlikely(tasks[NR_UNINTERRUPTIBLE]);
> > + case PSI_UN_FULL:
> > + return unlikely(tasks[NR_UNINTERRUPTIBLE] && !tasks[NR_RUNNING]);
> > case PSI_NONIDLE:
> > return tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] ||
> > tasks[NR_RUNNING];
> > @@ -683,6 +687,12 @@ static void record_times(struct psi_group_cpu *groupc, u64 now)
> > groupc->times[PSI_CPU_FULL] += delta;
> > }
> >
> > + if (groupc->state_mask & (1 << PSI_UN_SOME)) {
> > + groupc->times[PSI_UN_SOME] += delta;
> > + if (groupc->state_mask & (1 << PSI_UN_FULL))
> > + groupc->times[PSI_UN_FULL] += delta;
> > + }
> > +
> > if (groupc->state_mask & (1 << PSI_NONIDLE))
> > groupc->times[PSI_NONIDLE] += delta;
> > }
> > diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
> > index baa839c..bf98829 100644
> > --- a/kernel/sched/stats.h
> > +++ b/kernel/sched/stats.h
> > @@ -132,6 +132,7 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup)
> > if (p->in_iowait)
> > clear |= TSK_IOWAIT;
> > }
> > + clear |= TSK_UNINTERRUPTIBLE;
> >
> > psi_task_change(p, clear, set);
> > }
> > @@ -139,6 +140,7 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup)
> > static inline void psi_dequeue(struct task_struct *p, bool sleep)
> > {
> > int clear = TSK_RUNNING;
> > + int set = 0;
> >
> > if (static_branch_likely(&psi_disabled))
> > return;
> > @@ -154,8 +156,10 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep)
> >
> > if (p->in_memstall)
> > clear |= (TSK_MEMSTALL | TSK_MEMSTALL_RUNNING);
> > + if (READ_ONCE(p->__state) & TASK_UNINTERRUPTIBLE)
> > + set = TSK_UNINTERRUPTIBLE;
> >
> > - psi_task_change(p, clear, 0);
> > + psi_task_change(p, clear, set);
> > }
> >
> > static inline void psi_ttwu_dequeue(struct task_struct *p)