Re: [RFC -v3 PATCH 2/3] sched: add yield_to function

From: Mike Galbraith
Date: Fri Jan 07 2011 - 00:29:27 EST

Next message: Alexey Dobriyan: "Re: [patch] vga_switcheroo: comparing too few characters instrncmp()"
Previous message: Yuehai Xu: "Re: Who does determine the number of requests that can be servingsimultaneously in a storage?"
In reply to: Mike Galbraith: "Re: [RFC -v3 PATCH 2/3] sched: add yield_to function"
Next in thread: Hillf Danton: "Re: [RFC -v3 PATCH 2/3] sched: add yield_to function"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

On Wed, 2011-01-05 at 18:04 +0100, Peter Zijlstra wrote:
> On Wed, 2011-01-05 at 17:57 +0100, Mike Galbraith wrote:
> > + p_cfs_rq = cfs_rq_of(pse);
> > + local = 1;
> > + }
> > +#endif
> > +
> > + /* Tell the scheduler that we'd really like pse to run next. */
> > + p_cfs_rq->next = pse;
> > +
> > + /* We know whether we want to preempt or not, but are we allowed? */
> > + preempt &= same_thread_group(p, task_of(p_cfs_rq->curr));
> > +
> > + if (local)
> > + clear_buddies(cfs_rq, se);
>
> You might want to clear before setting next :-)

Or better, just remove dept. of redundancy dept. cruft. We clear
buddies upon selection. It's also pointless worrying whether to set
TIF_RESCHED or not, no cycle savings to be had there methinks.

While performing cruftectomy, also did cosmetic int ==> bool.

sched: Add yield_to(task, preempt) functionality.

Currently only implemented for fair class tasks.

Add a yield_to_task method() to the fair scheduling class. allowing the
caller of yield_to() to accelerate another thread in it's thread group,
task group, and sched class toward either it's cpu, or potentially the
caller's own cpu if the 'preempt' argument is also passed.

Implemented via a scheduler hint, using cfs_rq->next to encourage the
target being selected.

Signed-off-by: Rik van Riel <riel@xxxxxxxxxx>
Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx>
Signed-off-by: Mike Galbraith <efault@xxxxxx>

---
include/linux/sched.h | 1
kernel/sched.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++
kernel/sched_fair.c | 44 +++++++++++++++++++++++++++++++++++++++
3 files changed, 101 insertions(+)

Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -1056,6 +1056,7 @@ struct sched_class {
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq);
+ bool (*yield_to_task) (struct task_struct *p, bool preempt);

void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);

Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -5327,6 +5327,62 @@ void __sched yield(void)
}
EXPORT_SYMBOL(yield);

+/**
+ * yield_to - yield the current processor to another thread in
+ * your thread group, or accelerate that thread toward the
+ * processor it's on.
+ *
+ * It's the caller's job to ensure that the target task struct
+ * can't go away on us before we can do any checks.
+ */
+void __sched yield_to(struct task_struct *p, bool preempt)
+{
+ struct task_struct *curr = current;
+ struct rq *rq, *p_rq;
+ unsigned long flags;
+ bool yield = 0;
+
+ local_irq_save(flags);
+ rq = this_rq();
+
+again:
+ p_rq = task_rq(p);
+ double_rq_lock(rq, p_rq);
+ while (task_rq(p) != p_rq) {
+ double_rq_unlock(rq, p_rq);
+ goto again;
+ }
+
+ if (!curr->sched_class->yield_to_task)
+ goto out;
+
+ if (curr->sched_class != p->sched_class)
+ goto out;
+
+ if (task_running(p_rq, p) || p->state)
+ goto out;
+
+ if (!same_thread_group(p, curr))
+ goto out;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ if (task_group(p) != task_group(curr))
+ goto out;
+#endif
+
+ yield = curr->sched_class->yield_to_task(p, preempt);
+
+out:
+ double_rq_unlock(rq, p_rq);
+ local_irq_restore(flags);
+
+ if (yield) {
+ set_current_state(TASK_RUNNING);
+ schedule();
+ }
+}
+EXPORT_SYMBOL_GPL(yield_to);
+
/*
* This task is about to go to sleep on IO. Increment rq->nr_iowait so
* that process accounting knows that this is a task in IO wait state.
Index: linux-2.6/kernel/sched_fair.c
===================================================================
--- linux-2.6.orig/kernel/sched_fair.c
+++ linux-2.6/kernel/sched_fair.c
@@ -1337,6 +1337,49 @@ static void yield_task_fair(struct rq *r
}

#ifdef CONFIG_SMP
+static void pull_task(struct rq *src_rq, struct task_struct *p,
+ struct rq *this_rq, int this_cpu);
+#endif
+
+static bool yield_to_task_fair(struct task_struct *p, bool preempt)
+{
+ struct sched_entity *se = &current->se;
+ struct sched_entity *pse = &p->se;
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ struct cfs_rq *p_cfs_rq = cfs_rq_of(pse);
+ int this_cpu = smp_processor_id();
+
+ if (!pse->on_rq)
+ return false;
+
+#ifdef CONFIG_SMP
+ /*
+ * If this yield is important enough to want to preempt instead
+ * of only dropping a ->next hint, we're alone, and the target
+ * is not alone, pull the target to this cpu.
+ *
+ * NOTE: the target may be alone in it's cfs_rq if another class
+ * task or another task group is currently executing on it's cpu.
+ * In this case, we still pull, to accelerate it toward the cpu.
+ */
+ if (cfs_rq != p_cfs_rq && preempt && cfs_rq->nr_running == 1 &&
+ cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
+ pull_task(task_rq(p), p, this_rq(), this_cpu);
+ p_cfs_rq = cfs_rq_of(pse);
+ }
+#endif
+
+ /* Tell the scheduler that we'd really like pse to run next. */
+ p_cfs_rq->next = pse;
+
+ /* We know whether we want to preempt or not, but are we allowed? */
+ if (preempt && same_thread_group(p, task_of(p_cfs_rq->curr)))
+ resched_task(task_of(p_cfs_rq->curr));
+
+ return cfs_rq == p_cfs_rq;
+}
+
+#ifdef CONFIG_SMP

static void task_waking_fair(struct rq *rq, struct task_struct *p)
{
@@ -4143,6 +4186,7 @@ static const struct sched_class fair_sch
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
+ .yield_to_task = yield_to_task_fair,

.check_preempt_curr = check_preempt_wakeup,

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Alexey Dobriyan: "Re: [patch] vga_switcheroo: comparing too few characters instrncmp()"
Previous message: Yuehai Xu: "Re: Who does determine the number of requests that can be servingsimultaneously in a storage?"
In reply to: Mike Galbraith: "Re: [RFC -v3 PATCH 2/3] sched: add yield_to function"
Next in thread: Hillf Danton: "Re: [RFC -v3 PATCH 2/3] sched: add yield_to function"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]