Re: [PATCH 1/4] numa: introduce per-cgroup numa balancing locality, statistic

From: Peter Zijlstra
Date: Thu Jul 11 2019 - 09:48:07 EST


On Wed, Jul 03, 2019 at 11:28:10AM +0800, çè wrote:

> @@ -3562,10 +3563,53 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
> seq_putc(m, '\n');
> }
>
> +#ifdef CONFIG_NUMA_BALANCING
> + seq_puts(m, "locality");
> + for (nr = 0; nr < NR_NL_INTERVAL; nr++) {
> + int cpu;
> + u64 sum = 0;
> +
> + for_each_possible_cpu(cpu)
> + sum += per_cpu(memcg->stat_numa->locality[nr], cpu);
> +
> + seq_printf(m, " %u", jiffies_to_msecs(sum));
> + }
> + seq_putc(m, '\n');
> +#endif
> +
> return 0;
> }
> #endif /* CONFIG_NUMA */
>
> +#ifdef CONFIG_NUMA_BALANCING
> +
> +void memcg_stat_numa_update(struct task_struct *p)
> +{
> + struct mem_cgroup *memcg;
> + unsigned long remote = p->numa_faults_locality[3];
> + unsigned long local = p->numa_faults_locality[4];
> + unsigned long idx = -1;
> +
> + if (mem_cgroup_disabled())
> + return;
> +
> + if (remote || local) {
> + idx = ((local * 10) / (remote + local)) - 2;
> + /* 0~29% in one slot for cache align */
> + if (idx < PERCENT_0_29)
> + idx = PERCENT_0_29;
> + else if (idx >= NR_NL_INTERVAL)
> + idx = NR_NL_INTERVAL - 1;
> + }
> +
> + rcu_read_lock();
> + memcg = mem_cgroup_from_task(p);
> + if (idx != -1)
> + this_cpu_inc(memcg->stat_numa->locality[idx]);

I thought cgroups were supposed to be hierarchical. That is, if we have:

R
/ \
A
/\
B
\
t1

Then our task t1 should be accounted to B (as you do), but also to A and
R.

> + rcu_read_unlock();
> +}
> +#endif