Re: [PATCH v3] sched/deadline: Fix the intention to re-evalute tick dependency for offline cpu

From: Wanpeng Li
Date: Wed Aug 31 2016 - 06:31:25 EST


2016-08-31 17:21 GMT+08:00 Peter Zijlstra <peterz@xxxxxxxxxxxxx>:
> On Fri, Aug 19, 2016 at 09:56:59PM +0800, Wanpeng Li wrote:
>> 2016-08-19 21:25 GMT+08:00 Peter Zijlstra <peterz@xxxxxxxxxxxxx>:
>> > On Fri, Aug 12, 2016 at 05:24:03PM +0800, Wanpeng Li wrote:
>> >
>> >> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>> >> index d091f4a..ce0fb00 100644
>> >> --- a/kernel/sched/deadline.c
>> >> +++ b/kernel/sched/deadline.c
>> >> @@ -641,6 +641,11 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
>> >> goto unlock;
>> >> }
>> >>
>> >> +#ifdef CONFIG_SMP
>> >> + if (unlikely(!rq->online))
>> >> + goto offline;
>> >> +#endif
>> >> +
>> >> enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
>> >> if (dl_task(rq->curr))
>> >> check_preempt_curr_dl(rq, p, 0);
>> >> @@ -648,6 +653,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
>> >> resched_curr(rq);
>> >>
>> >> #ifdef CONFIG_SMP
>> >> +offline:
>> >> /*
>> >> * Perform balancing operations here; after the replenishments. We
>> >> * cannot drop rq->lock before this, otherwise the assertion in
>> >> @@ -659,6 +665,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
>> >> * XXX figure out if select_task_rq_dl() deals with offline cpus.
>> >> */
>> >> if (unlikely(!rq->online)) {
>> >> + replenish_dl_entity(dl_se, dl_se);
>> >> lockdep_unpin_lock(&rq->lock, rf.cookie);
>> >> rq = dl_task_offline_migration(rq, p);
>> >
>> > So I don't like this, even if it magically works. With this we end up
>> > calling dl_task_offline_migration() -> deactivate_task() while the task
>> > isn't on the runqueue at all.
>>
>> So how about v1, it also works :), https://lkml.org/lkml/2016/8/10/898
>
> Does something like so work? Notice what we have preemption disabled (by
> virtue of having pi_lock and rq->lock taken) and thus cannot hotplug.
> Therefore if we notice a rq not being online, it must stay that way,
> equally any online rq must also stay that way.
>
> This means we can fold the two online tests you had and simply do the rq
> switch beforehand.

Thanks for the proposal Peterz! It works and I just sent out v4. :)

Regards,
Wanpeng Li

>
> Completely untested...
>
> ---
> kernel/sched/deadline.c | 46 ++++++++++++++++++----------------------------
> 1 file changed, 18 insertions(+), 28 deletions(-)
>
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index d091f4a95416..bcade08772a8 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -243,10 +243,8 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
> static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
> {
> struct rq *later_rq = NULL;
> - bool fallback = false;
>
> later_rq = find_lock_later_rq(p, rq);
> -
> if (!later_rq) {
> int cpu;
>
> @@ -254,7 +252,6 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
> * If we cannot preempt any rq, fall back to pick any
> * online cpu.
> */
> - fallback = true;
> cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
> if (cpu >= nr_cpu_ids) {
> /*
> @@ -274,16 +271,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
> double_lock_balance(rq, later_rq);
> }
>
> - /*
> - * By now the task is replenished and enqueued; migrate it.
> - */
> - deactivate_task(rq, p, 0);
> set_task_cpu(p, later_rq->cpu);
> - activate_task(later_rq, p, 0);
> -
> - if (!fallback)
> - resched_curr(later_rq);
> -
> double_unlock_balance(later_rq, rq);
>
> return later_rq;
> @@ -641,29 +629,31 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
> goto unlock;
> }
>
> - enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
> - if (dl_task(rq->curr))
> - check_preempt_curr_dl(rq, p, 0);
> - else
> - resched_curr(rq);
> -
> #ifdef CONFIG_SMP
> - /*
> - * Perform balancing operations here; after the replenishments. We
> - * cannot drop rq->lock before this, otherwise the assertion in
> - * start_dl_timer() about not missing updates is not true.
> - *
> - * If we find that the rq the task was on is no longer available, we
> - * need to select a new rq.
> - *
> - * XXX figure out if select_task_rq_dl() deals with offline cpus.
> - */
> if (unlikely(!rq->online)) {
> + /*
> + * If the runqueue is no longer available, migrate the
> + * task elsewhere. This necessarily changes rq.
> + */
> lockdep_unpin_lock(&rq->lock, rf.cookie);
> rq = dl_task_offline_migration(rq, p);
> rf.cookie = lockdep_pin_lock(&rq->lock);
> +
> + /*
> + * Now that the task has been migrated to the new RQ and we
> + * have that locked, proceed as normal and enqueue the task
> + * there.
> + */
> }
> +#endif
>
> + enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
> + if (dl_task(rq->curr))
> + check_preempt_curr_dl(rq, p, 0);
> + else
> + resched_curr(rq);
> +
> +#ifdef CONFIG_SMP
> /*
> * Queueing this task back might have overloaded rq, check if we need
> * to kick someone away.



--
Regards,
Wanpeng Li