Re: [PATCH v4] vmstat: Kernel stack usage histogram

From: Pasha Tatashin
Date: Thu Jul 18 2024 - 22:56:07 EST


On Thu, Jul 18, 2024 at 7:36 PM Shakeel Butt <shakeel.butt@xxxxxxxxx> wrote:
>
> On Thu, Jul 18, 2024 at 08:26:11PM GMT, Pasha Tatashin wrote:
> [...]
> > diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
> > index ccd72b978e1f..65e8c9fb7f9b 100644
> > --- a/include/linux/sched/task_stack.h
> > +++ b/include/linux/sched/task_stack.h
> > @@ -95,9 +95,51 @@ static inline int object_is_on_stack(const void *obj)
> > extern void thread_stack_cache_init(void);
> >
> > #ifdef CONFIG_DEBUG_STACK_USAGE
> > +#ifdef CONFIG_VM_EVENT_COUNTERS
> > +#include <linux/vm_event_item.h>
> > +
> > +/* Count the maximum pages reached in kernel stacks */
> > +static inline void kstack_histogram(unsigned long used_stack)
>
> Any specific reason to add this function in header?

For performance reasons to keep it inlined into stack_not_used() which
is also defined as inline function in this header.

>
> > +{
> > + if (used_stack <= 1024)
> > + this_cpu_inc(vm_event_states.event[KSTACK_1K]);
>
> Why not count_vm_event(KSTACK_1K)? Avoiding header include recursion?

I could not include "linux/vmstat.h" into "linux/sched/task_stack.h"
because it introduces some dependencies such linux/mm.h and
linux/fs.h, uapi/linux/stat.h, and when all of those are added it
still fails to compile on some architectures, so it was just simpler
to stop resolving the conflicts and use this_cpu_inc() directly.

>
> > +#if THREAD_SIZE > 1024
> > + else if (used_stack <= 2048)
> > + this_cpu_inc(vm_event_states.event[KSTACK_2K]);
> > +#endif
> > +#if THREAD_SIZE > 2048
> > + else if (used_stack <= 4096)
> > + this_cpu_inc(vm_event_states.event[KSTACK_4K]);
> > +#endif
> > +#if THREAD_SIZE > 4096
> > + else if (used_stack <= 8192)
> > + this_cpu_inc(vm_event_states.event[KSTACK_8K]);
> > +#endif
> > +#if THREAD_SIZE > 8192
> > + else if (used_stack <= 16384)
> > + this_cpu_inc(vm_event_states.event[KSTACK_16K]);
> > +#endif
> > +#if THREAD_SIZE > 16384
> > + else if (used_stack <= 32768)
> > + this_cpu_inc(vm_event_states.event[KSTACK_32K]);
> > +#endif
> > +#if THREAD_SIZE > 32768
> > + else if (used_stack <= 65536)
> > + this_cpu_inc(vm_event_states.event[KSTACK_64K]);
> > +#endif
> > +#if THREAD_SIZE > 65536
> > + else
> > + this_cpu_inc(vm_event_states.event[KSTACK_REST]);
> > +#endif
> > +}
> > +#else /* !CONFIG_VM_EVENT_COUNTERS */
> > +static inline void kstack_histogram(unsigned long used_stack) {}
> > +#endif /* CONFIG_VM_EVENT_COUNTERS */
> > +
> > static inline unsigned long stack_not_used(struct task_struct *p)
> > {
> > unsigned long *n = end_of_stack(p);
> > + unsigned long unused_stack;
> >
> > do { /* Skip over canary */
> > # ifdef CONFIG_STACK_GROWSUP
> > @@ -108,10 +150,13 @@ static inline unsigned long stack_not_used(struct task_struct *p)
> > } while (!*n);
> >
> > # ifdef CONFIG_STACK_GROWSUP
> > - return (unsigned long)end_of_stack(p) - (unsigned long)n;
> > + unused_stack = (unsigned long)end_of_stack(p) - (unsigned long)n;
> > # else
> > - return (unsigned long)n - (unsigned long)end_of_stack(p);
> > + unused_stack = (unsigned long)n - (unsigned long)end_of_stack(p);
> > # endif
> > + kstack_histogram(THREAD_SIZE - unused_stack);
> > +
> > + return unused_stack;
> > }
> > #endif
> > extern void set_task_stack_end_magic(struct task_struct *tsk);
>