Re: [PATCH 2/6] sched/vtime: Bring up complete kcpustat accessor

From: Chris Wilson
Date: Sat Dec 28 2019 - 15:57:39 EST


Quoting Frederic Weisbecker (2019-11-21 02:44:26)
> +static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
> + const struct kernel_cpustat *src,
> + struct task_struct *tsk, int cpu)
> +{
> + struct vtime *vtime = &tsk->vtime;
> + unsigned int seq;
> + int err;
> +
> + do {
> + u64 *cpustat;
> + u64 delta;
> +
> + seq = read_seqcount_begin(&vtime->seqcount);
> +
> + err = vtime_state_check(vtime, cpu);
> + if (err < 0)
> + return err;
> +
> + *dst = *src;
> + cpustat = dst->cpustat;
> +
> + /* Task is sleeping, dead or idle, nothing to add */
> + if (vtime->state < VTIME_SYS)
> + continue;
> +
> + delta = vtime_delta(vtime);
> +
> + /*
> + * Task runs either in user (including guest) or kernel space,
> + * add pending nohz time to the right place.
> + */
> + if (vtime->state == VTIME_SYS) {
> + cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
> + } else if (vtime->state == VTIME_USER) {
> + if (task_nice(tsk) > 0)
> + cpustat[CPUTIME_NICE] += vtime->utime + delta;
> + else
> + cpustat[CPUTIME_USER] += vtime->utime + delta;
> + } else {
> + WARN_ON_ONCE(vtime->state != VTIME_GUEST);

I'm randomly hitting this WARN on a non-virtualised system reading
/proc/stat.

vtime->state is updated under the write_seqcount, so the access here is
deliberately racey, and the change in vtime->state would be picked up
the seqcount_retry.

Quick suggestion would be something along the lines of

static int vtime_state_check(struct vtime *vtime, int cpu)
{
+ int state = READ_ONCE(vtime->state);
+
/*
* We raced against a context switch, fetch the
* kcpustat task again.
@@ -930,10 +932,10 @@ static int vtime_state_check(struct vtime *vtime, int cpu)
*
* Case 1) is ok but 2) is not. So wait for a safe VTIME state.
*/
- if (vtime->state == VTIME_INACTIVE)
+ if (state == VTIME_INACTIVE)
return -EAGAIN;

- return 0;
+ return state;
}

static u64 kcpustat_user_vtime(struct vtime *vtime)
@@ -1055,7 +1057,7 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
cpustat = dst->cpustat;

/* Task is sleeping, dead or idle, nothing to add */
- if (vtime->state < VTIME_SYS)
+ if (err < VTIME_SYS)
continue;

delta = vtime_delta(vtime);
@@ -1064,15 +1066,15 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
* Task runs either in user (including guest) or kernel space,
* add pending nohz time to the right place.
*/
- if (vtime->state == VTIME_SYS) {
+ if (err == VTIME_SYS) {
cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
- } else if (vtime->state == VTIME_USER) {
+ } else if (err == VTIME_USER) {
if (task_nice(tsk) > 0)
cpustat[CPUTIME_NICE] += vtime->utime + delta;
else
cpustat[CPUTIME_USER] += vtime->utime + delta;
} else {
- WARN_ON_ONCE(vtime->state != VTIME_GUEST);
+ WARN_ON_ONCE(err != VTIME_GUEST);
if (task_nice(tsk) > 0) {
cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
cpustat[CPUTIME_NICE] += vtime->gtime + delta;

Or drop the warn.
-Chris