[RFC 17/60] sched: Introduce and use generic task group CFS traversal functions

From: Jan H. SchÃnherr
Date: Fri Sep 07 2018 - 17:47:56 EST


Task group management has to iterate over all CFS runqueues within the
task group. Currently, this uses for_each_possible_cpu() loops and
accesses tg->cfs_rq[] directly. This does not adjust well to the
upcoming addition of coscheduling, where we will have additional CFS
runqueues.

Introduce more general traversal loop constructs, which will extend
nicely to coscheduling. Rewrite task group management functions to
make use of these new loop constructs. Except for the function
alloc_fair_sched_group(), the changes are mostly cosmetic.
alloc_fair_sched_group() now iterates over the parent group to
create a new group.

Signed-off-by: Jan H. SchÃnherr <jschoenh@xxxxxxxxx>
---
kernel/sched/fair.c | 64 +++++++++++++++++++++++-----------------------------
kernel/sched/sched.h | 31 +++++++++++++++++++++++++
2 files changed, 59 insertions(+), 36 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 82cdd75e88b9..9f63ac37f5ef 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9805,16 +9805,13 @@ static void task_change_group_fair(struct task_struct *p, int type)

void free_fair_sched_group(struct task_group *tg)
{
- int i;
+ struct cfs_rq *cfs, *ncfs;

destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));

- if (!tg->cfs_rq)
- return;
-
- for_each_possible_cpu(i) {
- kfree(tg->cfs_rq[i]->my_se);
- kfree(tg->cfs_rq[i]);
+ taskgroup_for_each_cfsrq_safe(tg, cfs, ncfs) {
+ kfree(cfs->my_se);
+ kfree(cfs);
}

kfree(tg->cfs_rq);
@@ -9823,8 +9820,7 @@ void free_fair_sched_group(struct task_group *tg)
int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
{
struct sched_entity *se;
- struct cfs_rq *cfs_rq;
- int i;
+ struct cfs_rq *cfs_rq, *pcfs_rq;

tg->cfs_rq = kcalloc(nr_cpu_ids, sizeof(cfs_rq), GFP_KERNEL);
if (!tg->cfs_rq)
@@ -9834,26 +9830,25 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)

init_cfs_bandwidth(tg_cfs_bandwidth(tg));

- for_each_possible_cpu(i) {
- cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
- GFP_KERNEL, cpu_to_node(i));
- if (!cfs_rq)
- goto err;
+ taskgroup_for_each_cfsrq(parent, pcfs_rq) {
+ struct rq *rq = rq_of(pcfs_rq);
+ int node = cpu_to_node(cpu_of(rq));

- se = kzalloc_node(sizeof(struct sched_entity),
- GFP_KERNEL, cpu_to_node(i));
- if (!se)
- goto err_free_rq;
+ cfs_rq = kzalloc_node(sizeof(*cfs_rq), GFP_KERNEL, node);
+ se = kzalloc_node(sizeof(*se), GFP_KERNEL, node);
+ if (!cfs_rq || !se)
+ goto err_free;

- tg->cfs_rq[i] = cfs_rq;
+ tg->cfs_rq[cpu_of(rq)] = cfs_rq;
init_cfs_rq(cfs_rq);
- init_tg_cfs_entry(tg, cfs_rq, se, cpu_rq(i), parent->cfs_rq[i]);
+ init_tg_cfs_entry(tg, cfs_rq, se, rq, pcfs_rq);
}

return 1;

-err_free_rq:
+err_free:
kfree(cfs_rq);
+ kfree(se);
err:
return 0;
}
@@ -9861,17 +9856,17 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
void online_fair_sched_group(struct task_group *tg)
{
struct sched_entity *se;
+ struct cfs_rq *cfs;
struct rq *rq;
- int i;

- for_each_possible_cpu(i) {
- rq = cpu_rq(i);
- se = tg->cfs_rq[i]->my_se;
+ taskgroup_for_each_cfsrq(tg, cfs) {
+ rq = rq_of(cfs);
+ se = cfs->my_se;

raw_spin_lock_irq(&rq->lock);
update_rq_clock(rq);
attach_entity_cfs_rq(se);
- sync_throttle(tg->cfs_rq[i]);
+ sync_throttle(cfs);
raw_spin_unlock_irq(&rq->lock);
}
}
@@ -9879,24 +9874,21 @@ void online_fair_sched_group(struct task_group *tg)
void unregister_fair_sched_group(struct task_group *tg)
{
unsigned long flags;
- struct rq *rq;
- int cpu;
+ struct cfs_rq *cfs;

- for_each_possible_cpu(cpu) {
- remove_entity_load_avg(tg->cfs_rq[cpu]->my_se);
+ taskgroup_for_each_cfsrq(tg, cfs) {
+ remove_entity_load_avg(cfs->my_se);

/*
* Only empty task groups can be destroyed; so we can speculatively
* check on_list without danger of it being re-added.
*/
- if (!tg->cfs_rq[cpu]->on_list)
+ if (!cfs->on_list)
continue;

- rq = cpu_rq(cpu);
-
- raw_spin_lock_irqsave(&rq->lock, flags);
- list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
- raw_spin_unlock_irqrestore(&rq->lock, flags);
+ raw_spin_lock_irqsave(&rq_of(cfs)->lock, flags);
+ list_del_leaf_cfs_rq(cfs);
+ raw_spin_unlock_irqrestore(&rq_of(cfs)->lock, flags);
}
}

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index cd3a32ce8fc6..9ecbb57049a2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -946,6 +946,37 @@ static inline int cpu_of(struct rq *rq)
#endif
}

+#ifdef CONFIG_FAIR_GROUP_SCHED
+#define taskgroup_for_each_cfsrq(tg, cfs) \
+ for ((cfs) = taskgroup_first_cfsrq(tg); (cfs); \
+ (cfs) = taskgroup_next_cfsrq(tg, cfs))
+
+#define taskgroup_for_each_cfsrq_safe(tg, cfs, ncfs) \
+ for ((cfs) = taskgroup_first_cfsrq(tg), \
+ (ncfs) = (cfs) ? taskgroup_next_cfsrq(tg, cfs) : NULL; \
+ (cfs); \
+ (cfs) = (ncfs), \
+ (ncfs) = (cfs) ? taskgroup_next_cfsrq(tg, cfs) : NULL)
+
+static inline struct cfs_rq *taskgroup_first_cfsrq(struct task_group *tg)
+{
+ int cpu = cpumask_first(cpu_possible_mask);
+
+ if (!tg->cfs_rq)
+ return NULL;
+ return tg->cfs_rq[cpu];
+}
+
+static inline struct cfs_rq *taskgroup_next_cfsrq(struct task_group *tg,
+ struct cfs_rq *cfs)
+{
+ int cpu = cpumask_next(cpu_of(cfs->rq), cpu_possible_mask);
+
+ if (cpu >= nr_cpu_ids)
+ return NULL;
+ return tg->cfs_rq[cpu];
+}
+#endif /* CONFIG_FAIR_GROUP_SCHED */

#ifdef CONFIG_SCHED_SMT

--
2.9.3.1.gcba166c.dirty