[RFC PATCH] kernel: sched: Provide a pointer to the valid CPU mask

From: Sebastian Andrzej Siewior
Date: Tue Apr 04 2017 - 14:42:24 EST


In commit 4b53a3412d66 ("sched/core: Remove the tsk_nr_cpus_allowed()
wrapper") the tsk_nr_cpus_allowed() wrapper was removed. There was not
much difference in !RT but in RT we used this to implement
migrate_disable(). Within a migrate_disable() section the CPU mask is
restricted to single CPU while the "normal" CPU mask remains untouched.

As an alternative implementation Ingo suggested to use
struct task_struct {
const cpumask_t *cpus_ptr;
cpumask_t cpus_mask;
};
with
t->cpus_allowed_ptr = &t->cpus_allowed;

In -RT we then can switch the cpus_ptr to
t->cpus_allowed_ptr = &cpumask_of(task_cpu(p));

in a migration disabled region. The rules are simple:
- Code that 'uses' ->cpus_allowed would use the pointer.
- Code that 'modifies' ->cpus_allowed would use the direct mask.

While converting the existing users I tried to stick with the rules
above howeverâ well mostly CPUFREQ tries to temporary switch the CPU
mask to do something on a certain CPU and then switches the mask back it
its original value. So in theory `cpus_ptr' could or should be used.
However if this is invoked in a migration disabled region (which is not
the case because it would require something like preempt_disable() and
set_cpus_allowed_ptr() might sleep so it can't be) then the "restore"
part would restore the wrong mask. So it only looks strange and I go for
the pointerâ

Some drivers copy the cpumask without cpumask_copy() and others use
cpumask_copy but without alloc_cpumask_var(). I did not fix those as
part of this, could do this as a follow upâ

So is this the way we want it?
Is the usage of `cpus_ptr' vs `cpus_mask' for the set + restore part
(see cpufreq users) what we want? At some point it looks like they
should use a different interface for their doing. I am not sure why
switching to certain CPU is important but maybe it could be done via a
workqueue from the CPUFREQ core (so we have a comment desribing why are
doing this and a get_online_cpus() to ensure that the CPU does not go
offline too early).

Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Mike Galbraith <efault@xxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxx>
Cc: Rafael J. Wysocki <rjw@xxxxxxxxxxxxx>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
---
arch/ia64/kernel/mca.c | 2 +-
arch/ia64/kernel/salinfo.c | 2 +-
arch/ia64/kernel/topology.c | 2 +-
arch/ia64/sn/kernel/sn2/sn_hwperf.c | 2 +-
arch/mips/include/asm/switch_to.h | 4 +--
arch/mips/kernel/mips-mt-fpaff.c | 2 +-
arch/mips/kernel/traps.c | 6 ++---
arch/powerpc/kernel/smp.c | 2 +-
arch/powerpc/platforms/cell/spufs/sched.c | 2 +-
arch/sparc/kernel/sysfs.c | 2 +-
arch/tile/include/asm/setup.h | 2 +-
arch/tile/kernel/hardwall.c | 10 +++----
drivers/acpi/processor_throttling.c | 2 +-
drivers/cpufreq/ia64-acpi-cpufreq.c | 4 +--
drivers/cpufreq/sh-cpufreq.c | 2 +-
drivers/cpufreq/sparc-us2e-cpufreq.c | 4 +--
drivers/cpufreq/sparc-us3-cpufreq.c | 4 +--
drivers/crypto/n2_core.c | 2 +-
drivers/infiniband/hw/hfi1/affinity.c | 6 ++---
drivers/infiniband/hw/hfi1/sdma.c | 3 +--
drivers/infiniband/hw/qib/qib_file_ops.c | 7 +++--
fs/proc/array.c | 4 +--
include/linux/init_task.h | 3 ++-
include/linux/sched.h | 5 ++--
kernel/cgroup/cpuset.c | 2 +-
kernel/sched/core.c | 42 +++++++++++++++---------------
kernel/sched/cpudeadline.c | 4 +--
kernel/sched/cpupri.c | 4 +--
kernel/sched/deadline.c | 6 ++---
kernel/sched/fair.c | 28 ++++++++++----------
kernel/sched/rt.c | 4 +--
kernel/trace/trace_hwlat.c | 2 +-
lib/smp_processor_id.c | 2 +-
samples/trace_events/trace-events-sample.c | 2 +-
34 files changed, 90 insertions(+), 90 deletions(-)

diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 79c7c46d7dc1..f5f116e1a504 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1824,7 +1824,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
ti->cpu = cpu;
p->stack = ti;
p->state = TASK_UNINTERRUPTIBLE;
- cpumask_set_cpu(cpu, &p->cpus_allowed);
+ cpumask_set_cpu(cpu, &p->cpus_mask);
INIT_LIST_HEAD(&p->tasks);
p->parent = p->real_parent = p->group_leader = p;
INIT_LIST_HEAD(&p->children);
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index d194d5c83d32..382c1cdc28f5 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -383,7 +383,7 @@ salinfo_log_release(struct inode *inode, struct file *file)
static void
call_on_cpu(int cpu, void (*fn)(void *), void *arg)
{
- cpumask_t save_cpus_allowed = current->cpus_allowed;
+ cpumask_t save_cpus_allowed = current->cpus_mask;
set_cpus_allowed_ptr(current, cpumask_of(cpu));
(*fn)(arg);
set_cpus_allowed_ptr(current, &save_cpus_allowed);
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 1a68f012a6dc..62684276abeb 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -360,7 +360,7 @@ static int cache_add_dev(unsigned int cpu)
if (all_cpu_cache_info[cpu].kobj.parent)
return 0;

- oldmask = current->cpus_allowed;
+ oldmask = current->cpus_mask;
retval = set_cpus_allowed_ptr(current, cpumask_of(cpu));
if (unlikely(retval))
return retval;
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 52704f199dd6..3c9a3c9b0ee7 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -632,7 +632,7 @@ static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info)
}
else {
/* migrate the task before calling SAL */
- save_allowed = current->cpus_allowed;
+ save_allowed = current->cpus_mask;
set_cpus_allowed_ptr(current, cpumask_of(cpu));
sn_hwperf_call_sal(op_info);
set_cpus_allowed_ptr(current, &save_allowed);
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index e610473d61b8..1428b4febbc9 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -42,7 +42,7 @@ extern struct task_struct *ll_task;
* inline to try to keep the overhead down. If we have been forced to run on
* a "CPU" with an FPU because of a previous high level of FP computation,
* but did not actually use the FPU during the most recent time-slice (CU1
- * isn't set), we undo the restriction on cpus_allowed.
+ * isn't set), we undo the restriction on cpus_mask.
*
* We're not calling set_cpus_allowed() here, because we have no need to
* force prompt migration - we're already switching the current CPU to a
@@ -57,7 +57,7 @@ do { \
test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \
(!(KSTK_STATUS(prev) & ST0_CU1))) { \
clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \
- prev->cpus_allowed = prev->thread.user_cpus_allowed; \
+ prev->cpus_mask = prev->thread.user_cpus_allowed; \
} \
next->thread.emulated_fp = 0; \
} while(0)
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index 8cab633e0e5a..f64555ea9068 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -176,7 +176,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
if (retval)
goto out_unlock;

- cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed);
+ cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr);
cpumask_and(&mask, &allowed, cpu_active_mask);

out_unlock:
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index c7d17cfb32f6..d77e79e03bd0 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1191,12 +1191,12 @@ static void mt_ase_fp_affinity(void)
* restricted the allowed set to exclude any CPUs with FPUs,
* we'll skip the procedure.
*/
- if (cpumask_intersects(&current->cpus_allowed, &mt_fpu_cpumask)) {
+ if (cpumask_intersects(&current->cpus_mask, &mt_fpu_cpumask)) {
cpumask_t tmask;

current->thread.user_cpus_allowed
- = current->cpus_allowed;
- cpumask_and(&tmask, &current->cpus_allowed,
+ = current->cpus_mask;
+ cpumask_and(&tmask, &current->cpus_mask,
&mt_fpu_cpumask);
set_cpus_allowed_ptr(current, &tmask);
set_thread_flag(TIF_FPUBOUND);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 46f89e66a273..f7d2023023c5 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -796,7 +796,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
* se we pin us down to CPU 0 for a short while
*/
alloc_cpumask_var(&old_mask, GFP_NOWAIT);
- cpumask_copy(old_mask, &current->cpus_allowed);
+ cpumask_copy(old_mask, &current->cpus_mask);
set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));

if (smp_ops && smp_ops->setup_cpu)
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 1fbb5da17dd2..ca86366d5424 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -141,7 +141,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
* runqueue. The context will be rescheduled on the proper node
* if it is timesliced or preempted.
*/
- cpumask_copy(&ctx->cpus_allowed, &current->cpus_allowed);
+ cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr);

/* Save the current cpu id for spu interrupt routing. */
ctx->last_ran = raw_smp_processor_id();
diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c
index d63fc613e7a9..c3f9a4c9abf0 100644
--- a/arch/sparc/kernel/sysfs.c
+++ b/arch/sparc/kernel/sysfs.c
@@ -106,7 +106,7 @@ static unsigned long run_on_cpu(unsigned long cpu,
cpumask_t old_affinity;
unsigned long ret;

- cpumask_copy(&old_affinity, &current->cpus_allowed);
+ cpumask_copy(&old_affinity, &current->cpus_mask);
/* should return -EINVAL to userspace */
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
return 0;
diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h
index 2a0347af0702..670fa2f4cfc3 100644
--- a/arch/tile/include/asm/setup.h
+++ b/arch/tile/include/asm/setup.h
@@ -49,7 +49,7 @@ int hardwall_ipi_valid(int cpu);

/* Hook hardwall code into changes in affinity. */
#define arch_set_cpus_allowed(p, new_mask) do { \
- if (!cpumask_equal(&p->cpus_allowed, new_mask)) \
+ if (!cpumask_equal(p->cpus_ptr, new_mask)) \
hardwall_deactivate_all(p); \
} while (0)
#endif
diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c
index 2fd1694ac1d0..8cec2a88049d 100644
--- a/arch/tile/kernel/hardwall.c
+++ b/arch/tile/kernel/hardwall.c
@@ -590,12 +590,12 @@ static int hardwall_activate(struct hardwall_info *info)
* Get our affinity; if we're not bound to this tile uniquely,
* we can't access the network registers.
*/
- if (cpumask_weight(&p->cpus_allowed) != 1)
+ if (p->nr_cpus_allowed != 1)
return -EPERM;

/* Make sure we are bound to a cpu assigned to this resource. */
cpu = smp_processor_id();
- BUG_ON(cpumask_first(&p->cpus_allowed) != cpu);
+ BUG_ON(cpumask_first(p->cpus_ptr) != cpu);
if (!cpumask_test_cpu(cpu, &info->cpumask))
return -EINVAL;

@@ -621,17 +621,17 @@ static int hardwall_activate(struct hardwall_info *info)
* Deactivate a task's hardwall. Must hold lock for hardwall_type.
* This method may be called from exit_thread(), so we don't want to
* rely on too many fields of struct task_struct still being valid.
- * We assume the cpus_allowed, pid, and comm fields are still valid.
+ * We assume the nr_cpus_allowed, pid, and comm fields are still valid.
*/
static void _hardwall_deactivate(struct hardwall_type *hwt,
struct task_struct *task)
{
struct thread_struct *ts = &task->thread;

- if (cpumask_weight(&task->cpus_allowed) != 1) {
+ if (task->nr_cpus_allowed) != 1 {
pr_err("pid %d (%s) releasing %s hardwall with an affinity mask containing %d cpus!\n",
task->pid, task->comm, hwt->name,
- cpumask_weight(&task->cpus_allowed));
+ task->nr_cpus_allowed);
BUG();
}

diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index a12f96cc93ff..a689b84721cc 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -918,7 +918,7 @@ static int acpi_processor_get_throttling(struct acpi_processor *pr)
/*
* Migrate task to the cpu pointed by pr.
*/
- cpumask_copy(saved_mask, &current->cpus_allowed);
+ cpumask_copy(saved_mask, &current->cpus_mask);
/* FIXME: use work_on_cpu() */
if (set_cpus_allowed_ptr(current, cpumask_of(pr->id))) {
/* Can't migrate to the target pr->id CPU. Exit */
diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c
index e28a31a40829..deb06ffbafb9 100644
--- a/drivers/cpufreq/ia64-acpi-cpufreq.c
+++ b/drivers/cpufreq/ia64-acpi-cpufreq.c
@@ -110,7 +110,7 @@ processor_get_freq (

pr_debug("processor_get_freq\n");

- saved_mask = current->cpus_allowed;
+ saved_mask = current->cpus_mask;
set_cpus_allowed_ptr(current, cpumask_of(cpu));
if (smp_processor_id() != cpu)
goto migrate_end;
@@ -146,7 +146,7 @@ processor_set_freq (

pr_debug("processor_set_freq\n");

- saved_mask = current->cpus_allowed;
+ saved_mask = current->cpus_mask;
set_cpus_allowed_ptr(current, cpumask_of(policy->cpu));
if (smp_processor_id() != policy->cpu) {
retval = -EAGAIN;
diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c
index 86628e22b2a3..3a9432f9fb64 100644
--- a/drivers/cpufreq/sh-cpufreq.c
+++ b/drivers/cpufreq/sh-cpufreq.c
@@ -49,7 +49,7 @@ static int sh_cpufreq_target(struct cpufreq_policy *policy,
struct device *dev;
long freq;

- cpus_allowed = current->cpus_allowed;
+ cpus_allowed = current->cpus_mask;
set_cpus_allowed_ptr(current, cpumask_of(cpu));

BUG_ON(smp_processor_id() != cpu);
diff --git a/drivers/cpufreq/sparc-us2e-cpufreq.c b/drivers/cpufreq/sparc-us2e-cpufreq.c
index 35ddb6da93aa..db16a3c6386d 100644
--- a/drivers/cpufreq/sparc-us2e-cpufreq.c
+++ b/drivers/cpufreq/sparc-us2e-cpufreq.c
@@ -234,7 +234,7 @@ static unsigned int us2e_freq_get(unsigned int cpu)
cpumask_t cpus_allowed;
unsigned long clock_tick, estar;

- cpumask_copy(&cpus_allowed, &current->cpus_allowed);
+ cpumask_copy(&cpus_allowed, &current->cpus_mask);
set_cpus_allowed_ptr(current, cpumask_of(cpu));

clock_tick = sparc64_get_clock_tick(cpu) / 1000;
@@ -252,7 +252,7 @@ static int us2e_freq_target(struct cpufreq_policy *policy, unsigned int index)
unsigned long clock_tick, divisor, old_divisor, estar;
cpumask_t cpus_allowed;

- cpumask_copy(&cpus_allowed, &current->cpus_allowed);
+ cpumask_copy(&cpus_allowed, &current->cpus_mask);
set_cpus_allowed_ptr(current, cpumask_of(cpu));

new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/drivers/cpufreq/sparc-us3-cpufreq.c b/drivers/cpufreq/sparc-us3-cpufreq.c
index a8d86a449ca1..603258602d03 100644
--- a/drivers/cpufreq/sparc-us3-cpufreq.c
+++ b/drivers/cpufreq/sparc-us3-cpufreq.c
@@ -82,7 +82,7 @@ static unsigned int us3_freq_get(unsigned int cpu)
unsigned long reg;
unsigned int ret;

- cpumask_copy(&cpus_allowed, &current->cpus_allowed);
+ cpumask_copy(&cpus_allowed, &current->cpus_mask);
set_cpus_allowed_ptr(current, cpumask_of(cpu));

reg = read_safari_cfg();
@@ -99,7 +99,7 @@ static int us3_freq_target(struct cpufreq_policy *policy, unsigned int index)
unsigned long new_bits, new_freq, reg;
cpumask_t cpus_allowed;

- cpumask_copy(&cpus_allowed, &current->cpus_allowed);
+ cpumask_copy(&cpus_allowed, &current->cpus_mask);
set_cpus_allowed_ptr(current, cpumask_of(cpu));

new_freq = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index c5aac25a5738..b853951f8ef0 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -1642,7 +1642,7 @@ static int spu_queue_register(struct spu_queue *p, unsigned long q_type)
if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
return -ENOMEM;

- cpumask_copy(old_allowed, &current->cpus_allowed);
+ cpumask_copy(old_allowed, &current->cpus_mask);

set_cpus_allowed_ptr(current, &p->sharing);

diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index e2cd2cd3b28a..822873226a19 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -576,7 +576,7 @@ int hfi1_get_proc_affinity(int node)
struct hfi1_affinity_node *entry;
cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
const struct cpumask *node_mask,
- *proc_mask = &current->cpus_allowed;
+ *proc_mask = current->cpus_ptr;
struct hfi1_affinity_node_list *affinity = &node_affinity;
struct cpu_mask_set *set = &affinity->proc;

@@ -584,7 +584,7 @@ int hfi1_get_proc_affinity(int node)
* check whether process/context affinity has already
* been set
*/
- if (cpumask_weight(proc_mask) == 1) {
+ if (current->nr_cpus_allowed == 1) {
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
current->pid, current->comm,
cpumask_pr_args(proc_mask));
@@ -595,7 +595,7 @@ int hfi1_get_proc_affinity(int node)
cpu = cpumask_first(proc_mask);
cpumask_set_cpu(cpu, &set->used);
goto done;
- } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
+ } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
current->pid, current->comm,
cpumask_pr_args(proc_mask));
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 5cde1ecda0fe..d8555c2d232c 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -856,14 +856,13 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
{
struct sdma_rht_node *rht_node;
struct sdma_engine *sde = NULL;
- const struct cpumask *current_mask = &current->cpus_allowed;
unsigned long cpu_id;

/*
* To ensure that always the same sdma engine(s) will be
* selected make sure the process is pinned to this CPU only.
*/
- if (cpumask_weight(current_mask) != 1)
+ if (current->nr_cpus_allowed != 1)
goto out;

cpu_id = smp_processor_id();
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 9396c1807cc3..a42fcd4735cb 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1163,7 +1163,7 @@ static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt)
static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
{
struct qib_filedata *fd = fp->private_data;
- const unsigned int weight = cpumask_weight(&current->cpus_allowed);
+ const unsigned int weight = current->nr_cpus_allowed;
const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
int local_cpu;

@@ -1644,9 +1644,8 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
ret = find_free_ctxt(i_minor - 1, fp, uinfo);
else {
int unit;
- const unsigned int cpu = cpumask_first(&current->cpus_allowed);
- const unsigned int weight =
- cpumask_weight(&current->cpus_allowed);
+ const unsigned int cpu = cpumask_first(current->cpus_ptr);
+ const unsigned int weight = current->nr_cpus_allowed;

if (weight == 1 && !test_bit(cpu, qib_cpulist))
if (!find_hca(cpu, &unit) && unit >= 0)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 88c355574aa0..5fe112bce201 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -364,9 +364,9 @@ static inline void task_context_switch_counts(struct seq_file *m,
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
{
seq_printf(m, "Cpus_allowed:\t%*pb\n",
- cpumask_pr_args(&task->cpus_allowed));
+ cpumask_pr_args(task->cpus_ptr));
seq_printf(m, "Cpus_allowed_list:\t%*pbl\n",
- cpumask_pr_args(&task->cpus_allowed));
+ cpumask_pr_args(task->cpus_ptr));
}

int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 91d9049f0039..d17c35006048 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -225,7 +225,8 @@ extern struct cred init_cred;
.static_prio = MAX_PRIO-20, \
.normal_prio = MAX_PRIO-20, \
.policy = SCHED_NORMAL, \
- .cpus_allowed = CPU_MASK_ALL, \
+ .cpus_ptr = &tsk.cpus_mask, \
+ .cpus_mask = CPU_MASK_ALL, \
.nr_cpus_allowed= NR_CPUS, \
.mm = NULL, \
.active_mm = &init_mm, \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d67eee84fd43..01b22549404b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -535,7 +535,8 @@ struct task_struct {

unsigned int policy;
int nr_cpus_allowed;
- cpumask_t cpus_allowed;
+ const cpumask_t *cpus_ptr;
+ cpumask_t cpus_mask;

#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
@@ -1218,7 +1219,7 @@ extern struct pid *cad_pid;
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
-#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
+#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 0f41292be0fb..5bb6d48cfece 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2092,7 +2092,7 @@ static void cpuset_fork(struct task_struct *task)
if (task_css_is_root(task, cpuset_cgrp_id))
return;

- set_cpus_allowed_ptr(task, &current->cpus_allowed);
+ set_cpus_allowed_ptr(task, current->cpus_ptr);
task->mems_allowed = current->mems_allowed;
}

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3b31fc05a0f1..96481b3ea746 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -986,7 +986,7 @@ static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_
return rq;

/* Affinity changed (again). */
- if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+ if (!cpumask_test_cpu(dest_cpu, p->cpus_ptr))
return rq;

rq = move_queued_task(rq, p, dest_cpu);
@@ -1012,7 +1012,7 @@ static int migration_cpu_stop(void *data)
local_irq_disable();
/*
* We need to explicitly wake pending tasks before running
- * __migrate_task() such that we will not miss enforcing cpus_allowed
+ * __migrate_task() such that we will not miss enforcing cpus_ptr
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
*/
sched_ttwu_pending();
@@ -1043,7 +1043,7 @@ static int migration_cpu_stop(void *data)
*/
void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
{
- cpumask_copy(&p->cpus_allowed, new_mask);
+ cpumask_copy(&p->cpus_mask, new_mask);
p->nr_cpus_allowed = cpumask_weight(new_mask);
}

@@ -1113,7 +1113,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
goto out;
}

- if (cpumask_equal(&p->cpus_allowed, new_mask))
+ if (cpumask_equal(p->cpus_ptr, new_mask))
goto out;

if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
@@ -1264,10 +1264,10 @@ static int migrate_swap_stop(void *data)
if (task_cpu(arg->src_task) != arg->src_cpu)
goto unlock;

- if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed))
+ if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
goto unlock;

- if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed))
+ if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
goto unlock;

__migrate_swap_task(arg->src_task, arg->dst_cpu);
@@ -1308,10 +1308,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
goto out;

- if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed))
+ if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr))
goto out;

- if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed))
+ if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr))
goto out;

trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
@@ -1455,7 +1455,7 @@ void kick_process(struct task_struct *p)
EXPORT_SYMBOL_GPL(kick_process);

/*
- * ->cpus_allowed is protected by both rq->lock and p->pi_lock
+ * ->cpus_ptr is protected by both rq->lock and p->pi_lock
*
* A few notes on cpu_active vs cpu_online:
*
@@ -1495,14 +1495,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
for_each_cpu(dest_cpu, nodemask) {
if (!cpu_active(dest_cpu))
continue;
- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+ if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
return dest_cpu;
}
}

for (;;) {
/* Any allowed, online CPU? */
- for_each_cpu(dest_cpu, &p->cpus_allowed) {
+ for_each_cpu(dest_cpu, p->cpus_ptr) {
if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
continue;
if (!cpu_online(dest_cpu))
@@ -1547,7 +1547,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
}

/*
- * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
+ * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
*/
static inline
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
@@ -1557,11 +1557,11 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
if (p->nr_cpus_allowed > 1)
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
else
- cpu = cpumask_any(&p->cpus_allowed);
+ cpu = cpumask_any(p->cpus_ptr);

/*
* In order not to call set_task_cpu() on a blocking task we need
- * to rely on ttwu() to place the task on a valid ->cpus_allowed
+ * to rely on ttwu() to place the task on a valid ->cpus_ptr
* CPU.
*
* Since this is common to all placement strategies, this lives here.
@@ -1569,7 +1569,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
* [ this allows ->select_task() to simply return task_cpu(p) and
* not worry about this generic constraint ]
*/
- if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
+ if (unlikely(!cpumask_test_cpu(cpu, p->cpus_ptr) ||
!cpu_online(cpu)))
cpu = select_fallback_rq(task_cpu(p), p);

@@ -2543,7 +2543,7 @@ void wake_up_new_task(struct task_struct *p)
#ifdef CONFIG_SMP
/*
* Fork balancing, do it here and not earlier because:
- * - cpus_allowed can change in the fork path
+ * - cpus_ptr can change in the fork path
* - any previously selected CPU might disappear through hotplug
*
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
@@ -4273,7 +4273,7 @@ static int __sched_setscheduler(struct task_struct *p,
* the entire root_domain to become SCHED_DEADLINE. We
* will also fail if there's no bandwidth available.
*/
- if (!cpumask_subset(span, &p->cpus_allowed) ||
+ if (!cpumask_subset(span, p->cpus_ptr) ||
rq->rd->dl_bw.bw == 0) {
task_rq_unlock(rq, p, &rf);
return -EPERM;
@@ -4867,7 +4867,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
goto out_unlock;

raw_spin_lock_irqsave(&p->pi_lock, flags);
- cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
+ cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);

out_unlock:
@@ -5427,7 +5427,7 @@ int task_can_attach(struct task_struct *p,
* allowed nodes is unnecessary. Thus, cpusets are not
* applicable for such threads. This prevents checking for
* success of set_cpus_allowed_ptr() on all attached tasks
- * before cpus_allowed may be changed.
+ * before cpus_mask may be changed.
*/
if (p->flags & PF_NO_SETAFFINITY) {
ret = -EINVAL;
@@ -5483,7 +5483,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
if (curr_cpu == target_cpu)
return 0;

- if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed))
+ if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
return -EINVAL;

/* TODO: This is not properly updating schedstats */
@@ -5623,7 +5623,7 @@ static void migrate_tasks(struct rq *dead_rq)
next->sched_class->put_prev_task(rq, next);

/*
- * Rules for changing task_struct::cpus_allowed are holding
+ * Rules for changing task_struct::cpus_mask are holding
* both pi_lock and rq->lock, such that holding either
* stabilizes the mask.
*
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index fba235c7d026..165dd5078e85 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -128,10 +128,10 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
const struct sched_dl_entity *dl_se = &p->dl;

if (later_mask &&
- cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
+ cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) {
best_cpu = cpumask_any(later_mask);
goto out;
- } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
+ } else if (cpumask_test_cpu(cpudl_maximum(cp), p->cpus_ptr) &&
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
best_cpu = cpudl_maximum(cp);
if (later_mask)
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 981fcd7dc394..d84aae9b694c 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
if (skip)
continue;

- if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
+ if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids)
continue;

if (lowest_mask) {
- cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
+ cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);

/*
* We have to ensure that we have at least one bit
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a2ce59015642..ce9233cd1126 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -252,7 +252,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
* If we cannot preempt any rq, fall back to pick any
* online cpu.
*/
- cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
+ cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
if (cpu >= nr_cpu_ids) {
/*
* Fail to find any suitable cpu.
@@ -1286,7 +1286,7 @@ static void set_curr_task_dl(struct rq *rq)
static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
{
if (!task_running(rq, p) &&
- cpumask_test_cpu(cpu, &p->cpus_allowed))
+ cpumask_test_cpu(cpu, p->cpus_ptr))
return 1;
return 0;
}
@@ -1435,7 +1435,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
/* Retry if something changed. */
if (double_lock_balance(rq, later_rq)) {
if (unlikely(task_rq(task) != rq ||
- !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) ||
+ !cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) ||
task_running(rq, task) ||
!dl_task(task) ||
!task_on_rq_queued(task))) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dea138964b91..e136330f861f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1553,7 +1553,7 @@ static void task_numa_compare(struct task_numa_env *env,
*/
if (cur) {
/* Skip this swap candidate if cannot move to the source cpu */
- if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed))
+ if (!cpumask_test_cpu(env->src_cpu, cur->cpus_ptr))
goto unlock;

/*
@@ -1663,7 +1663,7 @@ static void task_numa_find_cpu(struct task_numa_env *env,

for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) {
/* Skip this CPU if the source task cannot migrate */
- if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed))
+ if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
continue;

env->dst_cpu = cpu;
@@ -5460,7 +5460,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,

/* Skip over this group if it has no CPUs allowed */
if (!cpumask_intersects(sched_group_cpus(group),
- &p->cpus_allowed))
+ p->cpus_ptr))
continue;

local_group = cpumask_test_cpu(this_cpu,
@@ -5580,7 +5580,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
return cpumask_first(sched_group_cpus(group));

/* Traverse only the allowed CPUs */
- for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
+ for_each_cpu_and(i, sched_group_cpus(group), p->cpus_ptr) {
if (idle_cpu(i)) {
struct rq *rq = cpu_rq(i);
struct cpuidle_state *idle = idle_get_state(rq);
@@ -5719,7 +5719,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
if (!test_idle_cores(target, false))
return -1;

- cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
+ cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);

for_each_cpu_wrap(core, cpus, target, wrap) {
bool idle = true;
@@ -5753,7 +5753,7 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
return -1;

for_each_cpu(cpu, cpu_smt_mask(target)) {
- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;
if (idle_cpu(cpu))
return cpu;
@@ -5805,7 +5805,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
time = local_clock();

for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {
- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;
if (idle_cpu(cpu))
break;
@@ -5960,7 +5960,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
if (sd_flag & SD_BALANCE_WAKE) {
record_wakee(p);
want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
- && cpumask_test_cpu(cpu, &p->cpus_allowed);
+ && cpumask_test_cpu(cpu, p->cpus_ptr);
}

rcu_read_lock();
@@ -6693,14 +6693,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
/*
* We do not migrate tasks that are:
* 1) throttled_lb_pair, or
- * 2) cannot be migrated to this CPU due to cpus_allowed, or
+ * 2) cannot be migrated to this CPU due to cpus_ptr, or
* 3) running (obviously), or
* 4) are cache-hot on their current CPU.
*/
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
return 0;

- if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) {
+ if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
int cpu;

schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
@@ -6720,7 +6720,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)

/* Prevent to re-select dst_cpu via env's cpus */
for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
- if (cpumask_test_cpu(cpu, &p->cpus_allowed)) {
+ if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
env->flags |= LBF_DST_PINNED;
env->new_dst_cpu = cpu;
break;
@@ -7254,7 +7254,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)

/*
* Group imbalance indicates (and tries to solve) the problem where balancing
- * groups is inadequate due to ->cpus_allowed constraints.
+ * groups is inadequate due to ->cpus_ptr constraints.
*
* Imagine a situation of two groups of 4 cpus each and 4 tasks each with a
* cpumask covering 1 cpu of the first group and 3 cpus of the second group.
@@ -7828,7 +7828,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
/*
* If the busiest group is imbalanced the below checks don't
* work because they assume all things are equal, which typically
- * isn't true due to cpus_allowed constraints and the like.
+ * isn't true due to cpus_ptr constraints and the like.
*/
if (busiest->group_type == group_imbalanced)
goto force_balance;
@@ -8213,7 +8213,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
* if the curr task on busiest cpu can't be
* moved to this_cpu
*/
- if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) {
+ if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) {
raw_spin_unlock_irqrestore(&busiest->lock,
flags);
env.flags |= LBF_ALL_PINNED;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 9f3e40226dec..872eba9f2174 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1591,7 +1591,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{
if (!task_running(rq, p) &&
- cpumask_test_cpu(cpu, &p->cpus_allowed))
+ cpumask_test_cpu(cpu, p->cpus_ptr))
return 1;
return 0;
}
@@ -1726,7 +1726,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
* Also make sure that it wasn't scheduled on its rq.
*/
if (unlikely(task_rq(task) != rq ||
- !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) ||
+ !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) ||
task_running(rq, task) ||
!rt_task(task) ||
!task_on_rq_queued(task))) {
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 21ea6ae77d93..0cdf70878312 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -279,7 +279,7 @@ static void move_to_next_cpu(void)
* of this thread, than stop migrating for the duration
* of the current test.
*/
- if (!cpumask_equal(current_mask, &current->cpus_allowed))
+ if (!cpumask_equal(current_mask, current->cpus_ptr))
goto disable;

get_online_cpus();
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 690d75b132fa..8779ffe90b76 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -22,7 +22,7 @@ notrace static unsigned int check_preemption_disabled(const char *what1,
* Kernel threads bound to a single CPU can safely use
* smp_processor_id():
*/
- if (cpumask_equal(&current->cpus_allowed, cpumask_of(this_cpu)))
+ if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu)))
goto out;

/*
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
index bc7fcf010a5b..9abbefb3617c 100644
--- a/samples/trace_events/trace-events-sample.c
+++ b/samples/trace_events/trace-events-sample.c
@@ -33,7 +33,7 @@ static void simple_thread_func(int cnt)

/* Silly tracepoints */
trace_foo_bar("hello", cnt, array, random_strings[len],
- &current->cpus_allowed);
+ current->cpus_ptr);

trace_foo_with_template_simple("HELLO", cnt);

--
2.11.0