[PATCH v3 1/2] taskstats: retain dead thread stats in TGID queries

From: Yiyang Chen

Date: Mon Apr 13 2026 - 11:48:48 EST


fill_stats_for_tgid() builds TGID stats from two sources: the cached
aggregate in signal->stats and a scan of the live threads in the group.

However, fill_tgid_exit() only accumulates delay accounting into
signal->stats. This means that once a thread exits, TGID queries lose
the fields that fill_stats_for_tgid() adds for live threads.

This gap was introduced incrementally by two earlier changes that
extended fill_stats_for_tgid() but did not make the corresponding
update to fill_tgid_exit():

- commit 8c733420bdd5 ("taskstats: add e/u/stime for TGID command")
added ac_etime, ac_utime, and ac_stime to the TGID query path.
- commit b663a79c1915 ("taskstats: add context-switch counters")
added nvcsw and nivcsw to the TGID query path.

As a result, those fields were accounted for live threads in TGID
queries, but were dropped from the cached TGID aggregate after thread
exit. The final TGID exit notification emitted when group_dead is true
also copies that cached aggregate, so it loses the same fields.

Factor the per-task TGID accumulation into tgid_stats_add_task() and
use it in both fill_stats_for_tgid() and fill_tgid_exit(). This keeps
the cached aggregate used for dead threads aligned with the live-thread
accumulation used by TGID queries.

Fixes: 8c733420bdd5 ("taskstats: add e/u/stime for TGID command")
Fixes: b663a79c1915 ("taskstats: add context-switch counters")
Cc: stable@xxxxxxxxxxxxxxx
Acked-by: Balbir Singh <balbirs@xxxxxxxxxx>
Signed-off-by: Yiyang Chen <cyyzero16@xxxxxxxxx>

diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 0cd680ccc7e5..f572f27a5828 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -210,13 +210,39 @@ static int fill_stats_for_pid(pid_t pid, struct taskstats *stats)
return 0;
}

+static void tgid_stats_add_task(struct taskstats *stats,
+ struct task_struct *tsk, u64 now_ns)
+{
+ u64 delta, utime, stime;
+
+ /*
+ * Each accounting subsystem calls its functions here to
+ * accumulate its per-task stats for tsk, into the per-tgid structure
+ *
+ * per-task-foo(stats, tsk);
+ */
+ delayacct_add_tsk(stats, tsk);
+
+ /* calculate task elapsed time in nsec */
+ delta = now_ns - tsk->start_time;
+ /* Convert to micro seconds */
+ do_div(delta, NSEC_PER_USEC);
+ stats->ac_etime += delta;
+
+ task_cputime(tsk, &utime, &stime);
+ stats->ac_utime += div_u64(utime, NSEC_PER_USEC);
+ stats->ac_stime += div_u64(stime, NSEC_PER_USEC);
+
+ stats->nvcsw += tsk->nvcsw;
+ stats->nivcsw += tsk->nivcsw;
+}
+
static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats)
{
struct task_struct *tsk, *first;
unsigned long flags;
int rc = -ESRCH;
- u64 delta, utime, stime;
- u64 start_time;
+ u64 now_ns;

/*
* Add additional stats from live tasks except zombie thread group
@@ -233,30 +259,12 @@ static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats)
else
memset(stats, 0, sizeof(*stats));

- start_time = ktime_get_ns();
+ now_ns = ktime_get_ns();
for_each_thread(first, tsk) {
if (tsk->exit_state)
continue;
- /*
- * Accounting subsystem can call its functions here to
- * fill in relevant parts of struct taskstsats as follows
- *
- * per-task-foo(stats, tsk);
- */
- delayacct_add_tsk(stats, tsk);
-
- /* calculate task elapsed time in nsec */
- delta = start_time - tsk->start_time;
- /* Convert to micro seconds */
- do_div(delta, NSEC_PER_USEC);
- stats->ac_etime += delta;

- task_cputime(tsk, &utime, &stime);
- stats->ac_utime += div_u64(utime, NSEC_PER_USEC);
- stats->ac_stime += div_u64(stime, NSEC_PER_USEC);
-
- stats->nvcsw += tsk->nvcsw;
- stats->nivcsw += tsk->nivcsw;
+ tgid_stats_add_task(stats, tsk, now_ns);
}

unlock_task_sighand(first, &flags);
@@ -275,18 +283,14 @@ static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats)
static void fill_tgid_exit(struct task_struct *tsk)
{
unsigned long flags;
+ u64 now_ns;

spin_lock_irqsave(&tsk->sighand->siglock, flags);
if (!tsk->signal->stats)
goto ret;

- /*
- * Each accounting subsystem calls its functions here to
- * accumalate its per-task stats for tsk, into the per-tgid structure
- *
- * per-task-foo(tsk->signal->stats, tsk);
- */
- delayacct_add_tsk(tsk->signal->stats, tsk);
+ now_ns = ktime_get_ns();
+ tgid_stats_add_task(tsk->signal->stats, tsk, now_ns);
ret:
spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
return;
--
2.43.0