[RFC 57/60] cosched: Add sysfs interface to configure coscheduling on cgroups

From: Jan H. SchÃnherr
Date: Fri Sep 07 2018 - 17:43:15 EST


Add the sysfs interface to configure the scheduling domain hierarchy
level at which coscheduling should happen for a cgroup. By default,
task groups are created with a value of zero corresponding to regular
task groups without any coscheduling.

Note, that you cannot specify a value that goes beyond that of the
root task group. The value for the root task group cannot be configured
via this interface. It has to be configured with a command line
argument, which will be added later.

The function sdrq_update_root() will be filled in a follow-up commit.

Signed-off-by: Jan H. SchÃnherr <jschoenh@xxxxxxxxx>
---
kernel/sched/core.c | 44 +++++++++++++++++++++++++++++++++++++++++
kernel/sched/cosched.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++
kernel/sched/sched.h | 4 ++++
3 files changed, 101 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 75de3b83a8c6..ad2ff9bc535c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6336,6 +6336,9 @@ void sched_offline_group(struct task_group *tg)
{
unsigned long flags;

+ /* Don't let offlining/destruction worry about coscheduling aspects */
+ cosched_set_scheduled(tg, 0);
+
/* End participation in shares distribution: */
unregister_fair_sched_group(tg);

@@ -6529,7 +6532,33 @@ static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css,

return (u64) scale_load_down(tg->shares);
}
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
+#ifdef CONFIG_COSCHEDULING
+static int cpu_scheduled_write_u64(struct cgroup_subsys_state *css, struct cftype *cftype,
+ u64 val)
+{
+ struct task_group *tg = css_tg(css);
+
+ if (tg == &root_task_group)
+ return -EACCES;
+
+ if (val > root_task_group.scheduled)
+ return -EINVAL;
+
+ cosched_set_scheduled(tg, val);
+ return 0;
+}

+static u64 cpu_scheduled_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ struct task_group *tg = css_tg(css);
+
+ return cosched_get_scheduled(tg);
+}
+#endif /* !CONFIG_COSCHEDULING */
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_CFS_BANDWIDTH
static DEFINE_MUTEX(cfs_constraints_mutex);

@@ -6825,6 +6854,13 @@ static struct cftype cpu_legacy_files[] = {
.write_u64 = cpu_shares_write_u64,
},
#endif
+#ifdef CONFIG_COSCHEDULING
+ {
+ .name = "scheduled",
+ .read_u64 = cpu_scheduled_read_u64,
+ .write_u64 = cpu_scheduled_write_u64,
+ },
+#endif
#ifdef CONFIG_CFS_BANDWIDTH
{
.name = "cfs_quota_us",
@@ -7012,6 +7048,14 @@ static struct cftype cpu_files[] = {
.write_s64 = cpu_weight_nice_write_s64,
},
#endif
+#ifdef CONFIG_COSCHEDULING
+ /* FIXME: This is not conform to cgroup-v2 conventions. */
+ {
+ .name = "scheduled",
+ .read_u64 = cpu_scheduled_read_u64,
+ .write_u64 = cpu_scheduled_write_u64,
+ },
+#endif
#ifdef CONFIG_CFS_BANDWIDTH
{
.name = "max",
diff --git a/kernel/sched/cosched.c b/kernel/sched/cosched.c
index f2d51079b3db..7c8b8c8d2814 100644
--- a/kernel/sched/cosched.c
+++ b/kernel/sched/cosched.c
@@ -515,6 +515,59 @@ void cosched_offline_group(struct task_group *tg)
list_del_rcu(&cfs->sdrq.tg_siblings);
}

+static void sdrq_update_root(struct sdrq *sdrq)
+{
+ /* TBD */
+}
+
+void cosched_set_scheduled(struct task_group *tg, int level)
+{
+ struct cfs_rq *cfs_rq;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&tg->lock, flags);
+
+ /*
+ * Update the is_root-fields of all hierarchical CFS runqueues in this
+ * task group. To avoid repetitive enqueues and dequeues on every level
+ * change, we chose pre- or post-order traversal.
+ */
+ if (level > tg->scheduled) {
+ /*
+ * roots move upwards: start reconfiguration at the top, so
+ * that everything is dequeued/enqueued only when we reach
+ * the previous scheduling level.
+ */
+ tg->scheduled = level;
+ taskgroup_for_each_cfsrq_topdown(tg, cfs_rq)
+ sdrq_update_root(&cfs_rq->sdrq);
+ }
+ if (level < tg->scheduled) {
+ /*
+ * roots move downwards: start reconfiguration at the bottom,
+ * so that we do the dequeuing/enqueuing immediately, when we
+ * reach the new scheduling level.
+ */
+ tg->scheduled = level;
+ taskgroup_for_each_cfsrq(tg, cfs_rq)
+ sdrq_update_root(&cfs_rq->sdrq);
+ }
+
+ raw_spin_unlock_irqrestore(&tg->lock, flags);
+}
+
+int cosched_get_scheduled(struct task_group *tg)
+{
+ unsigned long flags;
+ int level;
+
+ raw_spin_lock_irqsave(&tg->lock, flags);
+ level = tg->scheduled;
+ raw_spin_unlock_irqrestore(&tg->lock, flags);
+
+ return level;
+}
+
/*****************************************************************************
* Locking related functions
*****************************************************************************/
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f6146feb7e55..e257451e05a5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1212,6 +1212,8 @@ void cosched_init_sdrq(struct task_group *tg, struct cfs_rq *cfs,
struct cfs_rq *sd_parent, struct cfs_rq *tg_parent);
void cosched_online_group(struct task_group *tg);
void cosched_offline_group(struct task_group *tg);
+void cosched_set_scheduled(struct task_group *tg, int level);
+int cosched_get_scheduled(struct task_group *tg);
struct rq *rq_lock_owned(struct rq *rq, struct rq_owner_flags *orf);
void rq_unlock_owned(struct rq *rq, struct rq_owner_flags *orf);
void rq_chain_init(struct rq_chain *rc, struct rq *rq);
@@ -1226,6 +1228,8 @@ static inline void cosched_init_sdrq(struct task_group *tg, struct cfs_rq *cfs,
struct cfs_rq *tg_parent) { }
static inline void cosched_online_group(struct task_group *tg) { }
static inline void cosched_offline_group(struct task_group *tg) { }
+static inline void cosched_set_scheduled(struct task_group *tg, int level) { }
+static inline int cosched_get_scheduled(struct task_group *tg) { return 0; }
static inline struct rq *rq_lock_owned(struct rq *rq, struct rq_owner_flags *orf) { return rq; }
static inline void rq_unlock_owned(struct rq *rq, struct rq_owner_flags *orf) { }
static inline void rq_chain_init(struct rq_chain *rc, struct rq *rq) { }
--
2.9.3.1.gcba166c.dirty