Re: [PATCH v5 3/4] sched/deadline: Add support for SD_PREFER_SIBLING on find_later_rq()

From: Peter Zijlstra
Date: Thu Aug 03 2017 - 08:03:55 EST



I picked up the first 2 with edits to the Changelog as suggested
by Juri.

On Tue, May 23, 2017 at 11:00:58AM +0900, Byungchul Park wrote:

> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 0223694..ada264c 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1325,6 +1325,7 @@ static int find_later_rq(struct task_struct *task)
> struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
> int this_cpu = smp_processor_id();
> int cpu = task_cpu(task);
> + int fallback_cpu = -1;
>
> /* Make sure the mask is initialized first */
> if (unlikely(!later_mask))
> @@ -1385,6 +1386,15 @@ static int find_later_rq(struct task_struct *task)
> * already under consideration through later_mask.
> */
> if (best_cpu < nr_cpu_ids) {
> + /*
> + * If current domain is SD_PREFER_SIBLING
> + * flaged, we have to get more chances to
> + * check other siblings.
> + */
> + if (sd->flags & SD_PREFER_SIBLING) {
> + fallback_cpu = best_cpu;
> + continue;
> + }
> rcu_read_unlock();
> return best_cpu;
> }
> @@ -1393,6 +1403,13 @@ static int find_later_rq(struct task_struct *task)
> rcu_read_unlock();
>
> /*
> + * If fallback_cpu is valid, all our guesses failed *except* for
> + * SD_PREFER_SIBLING domain. Now, we can return the fallback cpu.
> + */
> + if (fallback_cpu != -1)
> + return fallback_cpu;
> +
> + /*
> * At this point, all our guesses failed, we just return
> * 'something', and let the caller sort the things out.
> */


This one I'm not sure on.. at the very least we should exclude all of
the prefer sibling domain when we do the next domain, and if there are
multiple prefer sibling levels, we should only pick the first
fallback_cpu -- there is no point is overriding it with a possible CPU
further away.

I implemented that below -- although the find_cpu() function is really
rather horrible.

But still this isn't quite right, because when we consider this for SMT
(as was the intent here) we'll happily occupy a full sibling core over
finding an empty one.

Now, the problem is that actually doing the right thing quickly ends up
very expensive, we'd have to scan the entire cache domain at least once,
so maybe this is good enough.. no idea :/


---
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1793,12 +1793,35 @@ static struct task_struct *pick_earliest

static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);

+/*
+ * Find the first cpu in: mask & sd & ~prefer
+ */
+static int find_cpu(const struct cpumask *mask,
+ const struct sched_domain *sd,
+ const struct sched_domain *prefer)
+{
+ const struct cpumask *sds = sched_domain_span(sd);
+ const struct cpumask *ps = prefer ? sched_domain_span(prefer) : NULL;
+ int cpu = -1;
+
+ while ((cpu = cpumask_next(cpu, mask)) < nr_cpu_ids) {
+ if (!cpumask_test_cpu(cpu, sds))
+ continue;
+ if (ps && cpumask_test_cpu(cpu, ps))
+ continue;
+ break;
+ }
+
+ return cpu;
+}
+
static int find_later_rq(struct task_struct *task)
{
- struct sched_domain *sd;
+ struct sched_domain *sd, *prefer = NULL;
struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
+ int fallback_cpu = -1;

/* Make sure the mask is initialized first */
if (unlikely(!later_mask))
@@ -1850,8 +1873,7 @@ static int find_later_rq(struct task_str
return this_cpu;
}

- best_cpu = cpumask_first_and(later_mask,
- sched_domain_span(sd));
+ best_cpu = find_cpu(later_mask, sd, prefer);
/*
* Last chance: if a cpu being in both later_mask
* and current sd span is valid, that becomes our
@@ -1859,6 +1881,17 @@ static int find_later_rq(struct task_str
* already under consideration through later_mask.
*/
if (best_cpu < nr_cpu_ids) {
+ /*
+ * If current domain is SD_PREFER_SIBLING
+ * flaged, we have to get more chances to
+ * check other siblings.
+ */
+ if (sd->flags & SD_PREFER_SIBLING) {
+ prefer = sd;
+ if (fallback_cpu == -1)
+ fallback_cpu = best_cpu;
+ continue;
+ }
rcu_read_unlock();
return best_cpu;
}
@@ -1867,6 +1900,13 @@ static int find_later_rq(struct task_str
rcu_read_unlock();

/*
+ * If fallback_cpu is valid, all our guesses failed *except* for
+ * SD_PREFER_SIBLING domain. Now, we can return the fallback cpu.
+ */
+ if (fallback_cpu != -1)
+ return fallback_cpu;
+
+ /*
* At this point, all our guesses failed, we just return
* 'something', and let the caller sort the things out.
*/