[RFC PATCH 4/4] sched:Implement set_gang_buddy

From: Nikunj A. Dadhania
Date: Mon Dec 19 2011 - 03:34:04 EST


set_next_buddy does not guarantee the pickup of the gang task because of the
preempt check. This sometimes hurts gang scheduling. Introducing
set_gang_buddy api to pick up gang tasks unconditionally.

Signed-off-by: Nikunj A. Dadhania <nikunj@xxxxxxxxxxxxxxxxxx>
---

kernel/sched/fair.c | 31 ++++++++++++++++++++++++++++++-
kernel/sched/sched.h | 2 +-
2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9a2f291..38f97b6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1165,6 +1165,17 @@ static void __clear_buddies_skip(struct sched_entity *se)
}
}

+static void __clear_buddies_gang(struct sched_entity *se)
+{
+ for_each_sched_entity(se) {
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ if (cfs_rq->gang == se)
+ cfs_rq->gang = NULL;
+ else
+ break;
+ }
+}
+
static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (cfs_rq->last == se)
@@ -1175,6 +1186,9 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)

if (cfs_rq->skip == se)
__clear_buddies_skip(se);
+
+ if (cfs_rq->gang == se)
+ __clear_buddies_gang(se);
}

static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
@@ -1331,6 +1345,12 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
se = cfs_rq->next;

+ /*
+ * Gang buddy, lets be unfair here
+ */
+ if (cfs_rq->gang)
+ se = cfs_rq->gang;
+
clear_buddies(cfs_rq, se);

return se;
@@ -2929,6 +2949,15 @@ static void set_skip_buddy(struct sched_entity *se)
cfs_rq_of(se)->skip = se;
}

+static void set_gang_buddy(struct sched_entity *se)
+{
+ if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
+ return;
+
+ for_each_sched_entity(se)
+ cfs_rq_of(se)->gang = se;
+}
+
/*
* Preempt the current task with a newly woken task if needed:
*/
@@ -5521,7 +5550,7 @@ static void gang_sched_member(void *info)
struct sched_entity *se = tg->se[cpu];

/* Make the parent favourable */
- set_next_buddy(se);
+ set_gang_buddy(se);
set_tsk_need_resched(current);
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index db8369f..a96731f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -226,7 +226,7 @@ struct cfs_rq {
* 'curr' points to currently running entity on this cfs_rq.
* It is set to NULL otherwise (i.e when none are currently running).
*/
- struct sched_entity *curr, *next, *last, *skip;
+ struct sched_entity *curr, *next, *last, *skip, *gang;

#ifdef CONFIG_SCHED_DEBUG
unsigned int nr_spread_over;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/