Re: [PATCH 8/9] psi: pressure stall information for CPU, memory, and IO
From: Peter Zijlstra
Date: Fri Sep 07 2018 - 06:17:07 EST
On Tue, Aug 28, 2018 at 01:22:57PM -0400, Johannes Weiner wrote:
> +enum psi_states {
> + PSI_IO_SOME,
> + PSI_IO_FULL,
> + PSI_MEM_SOME,
> + PSI_MEM_FULL,
> + PSI_CPU_SOME,
> + /* Only per-CPU, to weigh the CPU in the global average: */
> + PSI_NONIDLE,
> + NR_PSI_STATES,
> +};
> +static u32 get_recent_time(struct psi_group *group, int cpu,
> + enum psi_states state)
> +{
> + struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu);
> + unsigned int seq;
> + u32 time, delta;
> +
> + do {
> + seq = read_seqcount_begin(&groupc->seq);
> +
> + time = groupc->times[state];
> + /*
> + * In addition to already concluded states, we also
> + * incorporate currently active states on the CPU,
> + * since states may last for many sampling periods.
> + *
> + * This way we keep our delta sampling buckets small
> + * (u32) and our reported pressure close to what's
> + * actually happening.
> + */
> + if (test_state(groupc->tasks, state))
> + time += cpu_clock(cpu) - groupc->state_start;
> + } while (read_seqcount_retry(&groupc->seq, seq));
> +
> + delta = time - groupc->times_prev[state];
> + groupc->times_prev[state] = time;
> +
> + return delta;
> +}
> +static bool update_stats(struct psi_group *group)
> +{
> + u64 deltas[NR_PSI_STATES - 1] = { 0, };
> + unsigned long missed_periods = 0;
> + unsigned long nonidle_total = 0;
> + u64 now, expires, period;
> + int cpu;
> + int s;
> +
> + mutex_lock(&group->stat_lock);
> +
> + /*
> + * Collect the per-cpu time buckets and average them into a
> + * single time sample that is normalized to wallclock time.
> + *
> + * For averaging, each CPU is weighted by its non-idle time in
> + * the sampling period. This eliminates artifacts from uneven
> + * loading, or even entirely idle CPUs.
> + */
> + for_each_possible_cpu(cpu) {
> + u32 nonidle;
> +
> + nonidle = get_recent_time(group, cpu, PSI_NONIDLE);
> + nonidle = nsecs_to_jiffies(nonidle);
> + nonidle_total += nonidle;
> +
> + for (s = 0; s < PSI_NONIDLE; s++) {
> + u32 delta;
> +
> + delta = get_recent_time(group, cpu, s);
> + deltas[s] += (u64)delta * nonidle;
> + }
> + }
This does the whole seqcount thing 6x, which is a bit of a waste.
struct snapshot {
u32 times[NR_PSI_STATES];
};
static inline struct snapshot get_times_snapshot(struct psi_group *pg, int cpu)
{
struct pci_group_cpu *pgc = per_cpu_ptr(pg->pcpu, cpu);
struct snapshot s;
unsigned int seq;
u32 delta;
int i;
do {
seq = read_seqcount_begin(&pgc->seq);
delta = cpu_clock(cpu) - pgc->state_start;
for (i = 0; i < NR_PSI_STATES; i++) {
s.times[i] = gpc->times[i];
if (test_state(pgc->tasks, i))
s.times[i] += delta;
}
} while (read_seqcount_retry(&pgc->seq, seq);
return s;
}
for_each_possible_cpu(cpu) {
struct snapshot s = get_times_snapshot(pg, cpu);
nonidle = nsecs_to_jiffies(s.times[PSI_NONIDLE]);
nonidle_total += nonidle;
for (i = 0; i < PSI_NONIDLE; i++)
deltas[s] += (u64)s.times[i] * nonidle;
/* ... */
}
It's a bit cumbersome, but that's because of C.