Re: Random panic in load_balance() with 3.16-rc

From: Peter Zijlstra
Date: Wed Jul 23 2014 - 14:23:18 EST


On Wed, Jul 23, 2014 at 09:54:23AM -0700, Linus Torvalds wrote:
> Alternatively, keep it a "cpumask_var_t", but then you need to use
> __get_cpu_pointer() to get the address of it, and use
> "alloc_cpumask_var()" to allocate area for the OFFSTACK case.
>
> TOTALLY UNTESTED AND PROBABLY PURE CRAP PATCH ATTACHED.
>
> WARNING! WARNING! WARNING! This is just looking at the code, not
> really knowing it, and saying "that looks really really wrong". Maybe
> I'm full of shit.

If we're doing that, then we also need to unconditionally allocate
memory for that pointer.

The below is something that seems to be consistent and uses struct
cpumask * as you suggest.

Still wondering how the heck any of that worked and didn't generate more
crashing.

---
kernel/sched/core.c | 17 +++++++----------
kernel/sched/fair.c | 4 ++--
2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7bc599dc4aa4..976d520587a8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6867,7 +6867,7 @@ struct task_group root_task_group;
LIST_HEAD(task_groups);
#endif

-DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
+DECLARE_PER_CPU(struct cpumask *, load_balance_mask);

void __init sched_init(void)
{
@@ -6880,9 +6880,6 @@ void __init sched_init(void)
#ifdef CONFIG_RT_GROUP_SCHED
alloc_size += 2 * nr_cpu_ids * sizeof(void **);
#endif
-#ifdef CONFIG_CPUMASK_OFFSTACK
- alloc_size += num_possible_cpus() * cpumask_size();
-#endif
if (alloc_size) {
ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);

@@ -6902,12 +6899,12 @@ void __init sched_init(void)
ptr += nr_cpu_ids * sizeof(void **);

#endif /* CONFIG_RT_GROUP_SCHED */
-#ifdef CONFIG_CPUMASK_OFFSTACK
- for_each_possible_cpu(i) {
- per_cpu(load_balance_mask, i) = (void *)ptr;
- ptr += cpumask_size();
- }
-#endif /* CONFIG_CPUMASK_OFFSTACK */
+ }
+
+ for_each_possible_cpu(i) {
+ per_cpu(load_balance_mask, i) = kzalloc_node(cpumask_size(),
+ GFP_NOWAIT,
+ cpu_to_node(i));
}

init_rt_bandwidth(&def_rt_bandwidth,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 45943b2fa82b..e4d939dc1084 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6469,7 +6469,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
#define MAX_PINNED_INTERVAL 512

/* Working cpumask for load_balance and load_balance_newidle. */
-DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
+DEFINE_PER_CPU(struct cpumask *, load_balance_mask);

static int need_active_balance(struct lb_env *env)
{
@@ -6538,7 +6538,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
struct sched_group *group;
struct rq *busiest;
unsigned long flags;
- struct cpumask *cpus = __get_cpu_var(load_balance_mask);
+ struct cpumask *cpus = this_cpu_read(load_balance_mask);

struct lb_env env = {
.sd = sd,
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/