[PATCH 3/3] sched/numa: Fix numabalancing stats in /proc/pid/sched

From: Srikar Dronamraju
Date: Thu Jun 25 2015 - 13:22:24 EST


Commit 44dba3d5d6a1 (sched: Refactor task_struct to use numa_faults
instead of numa_* pointers) modified the way tsk->numa_faults stats are
accounted. However that commit never touched show_numa_stats() that
displays /proc/pid/sched. Now the numbers displayed in /proc/pid/sched
dont match the actual numbers.

Fix it by making sure that /proc/pid/sched reflects the task fault numbers
Also add group fault stats too.

Also couple of more modifications are added here.
1. Format change.
- Previously we would list two entries per node, one for private and one for
shared. Also the home node info was listed in each entry.

- Now preferred node, total_faults and current node are displayed separately.
- Now there is one entry per node, that lists private,shared task and group
faults.

2. p->numa_pages_migrated was getting reset after every read of
/proc/pid/sched. Its more useful to have absolute numbers since
differential migrations between two accesses can be easily calculated.

Signed-off-by: Srikar Dronamraju <srikar@xxxxxxxxxxxxxxxxxx>
---
kernel/sched/debug.c | 38 +++++++++++++++++---------------------
kernel/sched/fair.c | 22 +++++++++++++++++++++-
kernel/sched/sched.h | 10 +++++++++-
3 files changed, 47 insertions(+), 23 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index f1dcd1d..8b974ed 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -517,11 +517,21 @@ __initcall(init_sched_debug_procfs);
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))


+#ifdef CONFIG_NUMA_BALANCING
+void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
+ unsigned long tpf, unsigned long gsf, unsigned long gpf)
+{
+ SEQ_printf(m, "numa_faults node=%d ", node);
+ SEQ_printf(m, "task_private=%lu task_shared=%lu ", tsf, tpf);
+ SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gsf, gpf);
+}
+#endif
+
+
static void sched_show_numa(struct task_struct *p, struct seq_file *m)
{
#ifdef CONFIG_NUMA_BALANCING
struct mempolicy *pol;
- int node, i;

if (p->mm)
P(mm->numa_scan_seq);
@@ -533,26 +543,12 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m)
mpol_get(pol);
task_unlock(p);

- SEQ_printf(m, "numa_migrations, %ld\n", xchg(&p->numa_pages_migrated, 0));
-
- for_each_online_node(node) {
- for (i = 0; i < 2; i++) {
- unsigned long nr_faults = -1;
- int cpu_current, home_node;
-
- if (p->numa_faults)
- nr_faults = p->numa_faults[2*node + i];
-
- cpu_current = !i ? (task_node(p) == node) :
- (pol && node_isset(node, pol->v.nodes));
-
- home_node = (p->numa_preferred_nid == node);
-
- SEQ_printf(m, "numa_faults_memory, %d, %d, %d, %d, %ld\n",
- i, node, cpu_current, home_node, nr_faults);
- }
- }
-
+ P(numa_pages_migrated);
+ P(numa_preferred_nid);
+ P(total_numa_faults);
+ SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
+ task_node(p), task_numa_group_id(p));
+ show_numa_stats(p, m);
mpol_put(pol);
#endif
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3d57cc0..e31ece1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8473,7 +8473,27 @@ void print_cfs_stats(struct seq_file *m, int cpu)
print_cfs_rq(m, cpu, cfs_rq);
rcu_read_unlock();
}
-#endif
+
+#ifdef CONFIG_NUMA_BALANCING
+void show_numa_stats(struct task_struct *p, struct seq_file *m)
+{
+ int node;
+ unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
+
+ for_each_online_node(node) {
+ if (p->numa_faults) {
+ tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
+ tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)];
+ }
+ if (p->numa_group) {
+ gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)],
+ gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)];
+ }
+ print_numa_stats(m, node, tsf, tpf, gsf, gpf);
+ }
+}
+#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_SCHED_DEBUG */

__init void init_sched_fair_class(void)
{
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c901c9c..84d4879 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1696,7 +1696,15 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
extern void print_dl_stats(struct seq_file *m, int cpu);
extern void
print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
-#endif
+
+#ifdef CONFIG_NUMA_BALANCING
+extern void
+show_numa_stats(struct task_struct *p, struct seq_file *m);
+extern void
+print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
+ unsigned long tpf, unsigned long gsf, unsigned long gpf);
+#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_SCHED_DEBUG */

extern void init_cfs_rq(struct cfs_rq *cfs_rq);
extern void init_rt_rq(struct rt_rq *rt_rq);
--
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/