[PATCH 02/14] resource limits: aggregate task highwater marks to cgroup level

From: Topi Miettinen
Date: Fri Jul 15 2016 - 06:37:27 EST


Collect resource usage highwater marks of a task to cgroup
statistics when the task exits.

Signed-off-by: Topi Miettinen <toiwoton@xxxxxxxxx>
---
Documentation/accounting/getdelays.c | 10 ++++++-
include/linux/cgroup-defs.h | 5 ++++
include/uapi/linux/cgroupstats.h | 3 ++
kernel/cgroup.c | 55 ++++++++++++++++++++++++++++++++++++
4 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index 489f1b7..7c86279 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -27,7 +27,7 @@

#include <linux/genetlink.h>
#include "include/uapi/linux/taskstats.h"
-#include <linux/cgroupstats.h>
+#include "include/uapi/linux/cgroupstats.h"

/*
* Generic macros for dealing with netlink sockets. Might be duplicated
@@ -258,12 +258,20 @@ static const char *const rlimit_names[] = {

static void print_cgroupstats(struct cgroupstats *c)
{
+ int i;
+
printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, "
"uninterruptible %llu\n", (unsigned long long)c->nr_sleeping,
(unsigned long long)c->nr_io_wait,
(unsigned long long)c->nr_running,
(unsigned long long)c->nr_stopped,
(unsigned long long)c->nr_uninterruptible);
+
+ if (print_resource_accounting)
+ for (i = 0; i < RLIM_NLIMITS; i++)
+ printf("%s=%llu\n",
+ rlimit_names[i],
+ (unsigned long long)c->resource_hiwater[i]);
}


diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 5b17de6..86bbc08 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -16,6 +16,7 @@
#include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h>
#include <linux/workqueue.h>
+#include <linux/cgroupstats.h>

#ifdef CONFIG_CGROUPS

@@ -300,6 +301,10 @@ struct cgroup {
/* used to schedule release agent */
struct work_struct release_agent_work;

+#ifdef CONFIG_TASK_XACCT
+ struct cgroupstats stats;
+#endif
+
/* ids of the ancestors at each level including self */
int ancestor_ids[];
};
diff --git a/include/uapi/linux/cgroupstats.h b/include/uapi/linux/cgroupstats.h
index 3753c33..18b5b11 100644
--- a/include/uapi/linux/cgroupstats.h
+++ b/include/uapi/linux/cgroupstats.h
@@ -35,6 +35,9 @@ struct cgroupstats {
__u64 nr_uninterruptible; /* Number of tasks in uninterruptible */
/* state */
__u64 nr_io_wait; /* Number of tasks waiting on IO */
+ __u64 resource_hiwater[RLIM_NLIMITS]; /* high-watermark of
+ RLIMIT
+ resources */
};

/*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 75c0ff0..9b2d805 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -247,6 +247,7 @@ static void kill_css(struct cgroup_subsys_state *css);
static int cgroup_addrm_files(struct cgroup_subsys_state *css,
struct cgroup *cgrp, struct cftype cfts[],
bool is_add);
+static void cgroup_update_stats(void);

/**
* cgroup_ssid_enabled - cgroup subsys enabled test by subsys ID
@@ -2609,6 +2610,8 @@ out_release_tset:
list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
list_del_init(&cset->mg_node);
}
+ cgroup_update_stats();
+
spin_unlock_irq(&css_set_lock);
return ret;
}
@@ -4657,6 +4660,53 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
return 0;
}

+/*
+ * Update cgroupstats based on the stats from exiting task
+ */
+static void cgroup_update_stats_from_task(struct cgroup *cgrp,
+ struct task_struct *tsk)
+{
+ struct signal_struct *sig = tsk->signal;
+ int i;
+ unsigned int seq, nextseq;
+ unsigned long flags;
+
+ rcu_read_lock();
+ /* Attempt a lockless read on the first round. */
+ nextseq = 0;
+ do {
+ seq = nextseq;
+ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+ for (i = 0; i < RLIM_NLIMITS; i++)
+ if (cgrp->stats.resource_hiwater[i] <
+ sig->resource_highwatermark[i])
+ cgrp->stats.resource_hiwater[i] =
+ sig->resource_highwatermark[i];
+
+ /* If lockless access failed, take the lock. */
+ nextseq = 1;
+ } while (need_seqretry(&sig->stats_lock, seq));
+ done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
+ rcu_read_unlock();
+}
+
+static void cgroup_update_stats(void)
+{
+ struct cgroup_root *root;
+
+ for_each_root(root) {
+ struct cgroup *cgrp;
+
+ if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
+ continue;
+
+ cgrp = task_cgroup_from_root(current, root);
+
+ if (cgroup_on_dfl(cgrp))
+ cgroup_update_stats_from_task(cgrp, current);
+ }
+}
+
/**
* cgroupstats_build - build and fill cgroupstats
* @stats: cgroupstats to fill information into
@@ -4672,6 +4722,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
struct cgroup *cgrp;
struct css_task_iter it;
struct task_struct *tsk;
+ int i;

/* it should be kernfs_node belonging to cgroupfs and is a directory */
if (dentry->d_sb->s_type != &cgroup_fs_type || !kn ||
@@ -4714,9 +4765,13 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
stats->nr_io_wait++;
break;
}
+ cgroup_update_stats_from_task(cgrp, tsk);
}
css_task_iter_end(&it);

+ for (i = 0; i < RLIM_NLIMITS; i++)
+ stats->resource_hiwater[i] = cgrp->stats.resource_hiwater[i];
+
mutex_unlock(&cgroup_mutex);
return 0;
}
--
2.8.1