[PATCH 2/4] sched: minor fixes for group scheduler

From: Srivatsa Vaddagiri
Date: Sun Nov 25 2007 - 23:51:09 EST



Minor bug fixes for group scheduler:

- Use a mutex to serialize add/remove of task groups and also when
changing shares of a task group. Use the same mutex when printing cfs_rq
stats for various task groups.
- Use list_for_each_entry_rcu in for_each_leaf_cfs_rq macro (when walking task
group list)


Signed-off-by: Srivatsa Vaddagiri <vatsa@xxxxxxxxxxxxxxxxxx>

---
kernel/sched.c | 33 +++++++++++++++++++++++++--------
kernel/sched_fair.c | 4 +++-
2 files changed, 28 insertions(+), 9 deletions(-)

Index: current/kernel/sched.c
===================================================================
--- current.orig/kernel/sched.c
+++ current/kernel/sched.c
@@ -169,8 +169,6 @@ struct task_group {
/* runqueue "owned" by this group on each cpu */
struct cfs_rq **cfs_rq;
unsigned long shares;
- /* spinlock to serialize modification to shares */
- spinlock_t lock;
struct rcu_head rcu;
};

@@ -182,6 +180,11 @@ static DEFINE_PER_CPU(struct cfs_rq, ini
static struct sched_entity *init_sched_entity_p[NR_CPUS];
static struct cfs_rq *init_cfs_rq_p[NR_CPUS];

+/* task_group_mutex serializes add/remove of task groups and also changes to
+ * a task group's cpu shares.
+ */
+static DEFINE_MUTEX(task_group_mutex);
+
/* Default task group.
* Every task in system belong to this group at bootup.
*/
@@ -222,9 +225,21 @@ static inline void set_task_cfs_rq(struc
p->se.parent = task_group(p)->se[cpu];
}

+static inline void lock_task_group_list(void)
+{
+ mutex_lock(&task_group_mutex);
+}
+
+static inline void unlock_task_group_list(void)
+{
+ mutex_unlock(&task_group_mutex);
+}
+
#else

static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu) { }
+static inline void lock_task_group_list(void) { }
+static inline void unlock_task_group_list(void) { }

#endif /* CONFIG_FAIR_GROUP_SCHED */

@@ -6747,7 +6762,6 @@ void __init sched_init(void)
se->parent = NULL;
}
init_task_group.shares = init_task_group_load;
- spin_lock_init(&init_task_group.lock);
#endif

for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -6987,14 +7001,15 @@ struct task_group *sched_create_group(vo
se->parent = NULL;
}

+ tg->shares = NICE_0_LOAD;
+
+ lock_task_group_list();
for_each_possible_cpu(i) {
rq = cpu_rq(i);
cfs_rq = tg->cfs_rq[i];
list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
}
-
- tg->shares = NICE_0_LOAD;
- spin_lock_init(&tg->lock);
+ unlock_task_group_list();

return tg;

@@ -7040,10 +7055,12 @@ void sched_destroy_group(struct task_gro
struct cfs_rq *cfs_rq = NULL;
int i;

+ lock_task_group_list();
for_each_possible_cpu(i) {
cfs_rq = tg->cfs_rq[i];
list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
}
+ unlock_task_group_list();

BUG_ON(!cfs_rq);

@@ -7117,7 +7134,7 @@ int sched_group_set_shares(struct task_g
{
int i;

- spin_lock(&tg->lock);
+ lock_task_group_list();
if (tg->shares == shares)
goto done;

@@ -7126,7 +7143,7 @@ int sched_group_set_shares(struct task_g
set_se_shares(tg->se[i], shares);

done:
- spin_unlock(&tg->lock);
+ unlock_task_group_list();
return 0;
}

Index: current/kernel/sched_fair.c
===================================================================
--- current.orig/kernel/sched_fair.c
+++ current/kernel/sched_fair.c
@@ -685,7 +685,7 @@ static inline struct cfs_rq *cpu_cfs_rq(

/* Iterate thr' all leaf cfs_rq's on a runqueue */
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
- list_for_each_entry(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
+ list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)

/* Do the two (enqueued) entities belong to the same group ? */
static inline int
@@ -1126,7 +1126,9 @@ static void print_cfs_stats(struct seq_f
#ifdef CONFIG_FAIR_GROUP_SCHED
print_cfs_rq(m, cpu, &cpu_rq(cpu)->cfs);
#endif
+ lock_task_group_list();
for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
print_cfs_rq(m, cpu, cfs_rq);
+ unlock_task_group_list();
}
#endif

--
Regards,
vatsa
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/