Re: [PATCH 1/3] sched/rt: cpupri_find: implement fallback mechanism for !fit case

From: Qais Yousef
Date: Mon Feb 17 2020 - 18:45:57 EST


On 02/17/20 20:09, Dietmar Eggemann wrote:
> On 14/02/2020 17:39, Qais Yousef wrote:
>
> [...]
>
> > /**
> > * cpupri_find - find the best (lowest-pri) CPU in the system
> > * @cp: The cpupri context
> > @@ -62,80 +115,72 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
> > struct cpumask *lowest_mask,
> > bool (*fitness_fn)(struct task_struct *p, int cpu))
> > {
> > - int idx = 0;
> > int task_pri = convert_prio(p->prio);
> > + int best_unfit_idx = -1;
> > + int idx = 0, cpu;
> >
> > BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES);
> >
> > for (idx = 0; idx < task_pri; idx++) {
> > - struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
> > - int skip = 0;
> >
> > - if (!atomic_read(&(vec)->count))
> > - skip = 1;
> > - /*
> > - * When looking at the vector, we need to read the counter,
> > - * do a memory barrier, then read the mask.
> > - *
> > - * Note: This is still all racey, but we can deal with it.
> > - * Ideally, we only want to look at masks that are set.
> > - *
> > - * If a mask is not set, then the only thing wrong is that we
> > - * did a little more work than necessary.
> > - *
> > - * If we read a zero count but the mask is set, because of the
> > - * memory barriers, that can only happen when the highest prio
> > - * task for a run queue has left the run queue, in which case,
> > - * it will be followed by a pull. If the task we are processing
> > - * fails to find a proper place to go, that pull request will
> > - * pull this task if the run queue is running at a lower
> > - * priority.
> > - */
> > - smp_rmb();
> > -
> > - /* Need to do the rmb for every iteration */
> > - if (skip)
> > - continue;
> > -
> > - if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids)
> > + if (!__cpupri_find(cp, p, lowest_mask, idx))
> > continue;
> >
> > - if (lowest_mask) {
> > - int cpu;
>
> Shouldn't we add an extra condition here?
>
> + if (!static_branch_unlikely(&sched_asym_cpucapacity))
> + return 1;
> +
>
> Otherwise non-heterogeneous systems have to got through this
> for_each_cpu(cpu, lowest_mask) further below for no good reason.

Hmm below is the best solution I can think of at the moment. Works for you?

It's independent of what this patch tries to fix, so I'll add as a separate
patch to the series in the next update.

Thanks

--
Qais Yousef

---

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 5ea235f2cfe8..5f2eaf3affde 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -14,6 +14,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);

struct rt_bandwidth def_rt_bandwidth;

+typedef bool (*fitness_fn_t)(struct task_struct *p, int cpu);
+
static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
{
struct rt_bandwidth *rt_b =
@@ -1708,6 +1710,7 @@ static int find_lowest_rq(struct task_struct *task)
struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
+ fitness_fn_t fitness_fn;

/* Make sure the mask is initialized first */
if (unlikely(!lowest_mask))
@@ -1716,8 +1719,17 @@ static int find_lowest_rq(struct task_struct *task)
if (task->nr_cpus_allowed == 1)
return -1; /* No other targets possible */

+ /*
+ * Help cpupri_find avoid the cost of looking for a fitting CPU when
+ * not really needed.
+ */
+ if (static_branch_unlikely(&sched_asym_cpucapacity))
+ fitness_fn = rt_task_fits_capacity;
+ else
+ fitness_fn = NULL;
+
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask,
- rt_task_fits_capacity))
+ fitness_fn))
return -1; /* No targets found */

/*


>
> > + if (!lowest_mask || !fitness_fn)
> > + return 1;
> >
> > - cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);
> > + /* Ensure the capacity of the CPUs fit the task */
> > + for_each_cpu(cpu, lowest_mask) {
> > + if (!fitness_fn(p, cpu))
> > + cpumask_clear_cpu(cpu, lowest_mask);
> > + }
>
> [...]