Re: tip: demise of tsk_cpus_allowed() and tsk_nr_cpus_allowed()

From: Ingo Molnar
Date: Mon Feb 06 2017 - 17:23:54 EST



* Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:

> On Mon, Feb 06, 2017 at 01:29:28PM +0100, Ingo Molnar wrote:
> > > +/* Future-safe accessor for struct task_struct's cpus_allowed. */
> > > +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p)
> > > +{
> > > + if (__migrate_disabled(p))
> > > + return cpumask_of(task_cpu(p));
> > > +
> > > + return &p->cpus_allowed;
> > > +}
> > > +
> > > +static inline int tsk_nr_cpus_allowed(struct task_struct *p)
> > > +{
> > > + if (__migrate_disabled(p))
> > > + return 1;
> > > + return p->nr_cpus_allowed;
> > > +}
> >
> > So ... I think the cleaner approach in -rt would be to introduce
> > ->cpus_allowed_saved, and when disabling/enabling migration then saving the
> > current mask there and changing ->cpus_allowed - and then restoring it when
> > re-enabling migration.
> >
> > This means ->cpus_allowed could be used by the scheduler directly, no wrappery
> > would be required, AFAICS.
> >
> > ( Some extra care would be required in places that change ->cpus_allowed because
> > they'd now have to be aware of ->cpus_allowed_saved. )
> >
> > Am I missing something?
>
> cpumasks are a pain, the above avoids allocating more of them.

Yeah, so this could then be done by pointerifying ->cpus_allowed - more robust
than the wrappery, because as I've noted in the changelog there's a large body of
upstream code that does not use the wrappers but uses ->cpus_allowed directly:

arch/ia64/kernel/mca.c: cpumask_set_cpu(cpu, &p->cpus_allowed);
arch/ia64/kernel/salinfo.c: cpumask_t save_cpus_allowed = current->cpus_allowed;
arch/ia64/kernel/topology.c: oldmask = current->cpus_allowed;
arch/ia64/sn/kernel/sn2/sn_hwperf.c: save_allowed = current->cpus_allowed;
arch/mips/include/asm/switch_to.h: * isn't set), we undo the restriction on cpus_allowed.
arch/mips/include/asm/switch_to.h: prev->cpus_allowed = prev->thread.user_cpus_allowed; \
arch/mips/kernel/mips-mt-fpaff.c: cpumask_var_t cpus_allowed, new_mask, effective_mask;
arch/mips/kernel/mips-mt-fpaff.c: if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
arch/mips/kernel/mips-mt-fpaff.c: cpuset_cpus_allowed(p, cpus_allowed);
arch/mips/kernel/mips-mt-fpaff.c: if (!cpumask_subset(effective_mask, cpus_allowed)) {
arch/mips/kernel/mips-mt-fpaff.c: * update. Just reset the cpus_allowed to the
arch/mips/kernel/mips-mt-fpaff.c: * cpuset's cpus_allowed
arch/mips/kernel/mips-mt-fpaff.c: cpumask_copy(new_mask, cpus_allowed);
arch/mips/kernel/mips-mt-fpaff.c: free_cpumask_var(cpus_allowed);
arch/mips/kernel/mips-mt-fpaff.c: cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed);
arch/mips/kernel/traps.c: if (cpumask_intersects(&current->cpus_allowed, &mt_fpu_cpumask)) {
arch/mips/kernel/traps.c: = current->cpus_allowed;
arch/mips/kernel/traps.c: cpumask_and(&tmask, &current->cpus_allowed,
arch/powerpc/platforms/cell/spufs/sched.c: cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current));
arch/powerpc/platforms/cell/spufs/sched.c: if (cpumask_intersects(mask, &ctx->cpus_allowed))
arch/powerpc/platforms/cell/spufs/spufs.h: cpumask_t cpus_allowed;
arch/tile/include/asm/setup.h: if (!cpumask_equal(&p->cpus_allowed, new_mask)) \
arch/tile/kernel/hardwall.c: if (cpumask_weight(&p->cpus_allowed) != 1)
arch/tile/kernel/hardwall.c: BUG_ON(cpumask_first(&p->cpus_allowed) != cpu);
arch/tile/kernel/hardwall.c: * We assume the cpus_allowed, pid, and comm fields are still valid.
arch/tile/kernel/hardwall.c: if (cpumask_weight(&task->cpus_allowed) != 1) {
arch/tile/kernel/hardwall.c: cpumask_weight(&task->cpus_allowed));
drivers/acpi/processor_throttling.c: cpumask_copy(saved_mask, &current->cpus_allowed);
drivers/cpufreq/ia64-acpi-cpufreq.c: saved_mask = current->cpus_allowed;
drivers/cpufreq/ia64-acpi-cpufreq.c: saved_mask = current->cpus_allowed;
drivers/cpufreq/loongson2_cpufreq.c: cpumask_t cpus_allowed;
drivers/cpufreq/loongson2_cpufreq.c: cpus_allowed = current->cpus_allowed;
drivers/cpufreq/loongson2_cpufreq.c: set_cpus_allowed_ptr(current, &cpus_allowed);
drivers/cpufreq/sh-cpufreq.c: cpumask_t cpus_allowed;
drivers/cpufreq/sh-cpufreq.c: cpus_allowed = current->cpus_allowed;
drivers/cpufreq/sh-cpufreq.c: set_cpus_allowed_ptr(current, &cpus_allowed);
drivers/cpufreq/sparc-us2e-cpufreq.c: cpumask_t cpus_allowed;
drivers/cpufreq/sparc-us2e-cpufreq.c: cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
drivers/cpufreq/sparc-us2e-cpufreq.c: set_cpus_allowed_ptr(current, &cpus_allowed);
drivers/cpufreq/sparc-us2e-cpufreq.c: cpumask_t cpus_allowed;
drivers/cpufreq/sparc-us2e-cpufreq.c: cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
drivers/cpufreq/sparc-us2e-cpufreq.c: set_cpus_allowed_ptr(current, &cpus_allowed);
drivers/cpufreq/sparc-us3-cpufreq.c: cpumask_t cpus_allowed;
drivers/cpufreq/sparc-us3-cpufreq.c: cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
drivers/cpufreq/sparc-us3-cpufreq.c: set_cpus_allowed_ptr(current, &cpus_allowed);
drivers/cpufreq/sparc-us3-cpufreq.c: cpumask_t cpus_allowed;
drivers/cpufreq/sparc-us3-cpufreq.c: cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
drivers/cpufreq/sparc-us3-cpufreq.c: set_cpus_allowed_ptr(current, &cpus_allowed);
drivers/crypto/n2_core.c: cpumask_copy(old_allowed, &current->cpus_allowed);
drivers/infiniband/hw/qib/qib_file_ops.c: const unsigned int weight = cpumask_weight(&current->cpus_allowed);
drivers/infiniband/hw/qib/qib_file_ops.c: const unsigned int cpu = cpumask_first(&current->cpus_allowed);
drivers/infiniband/hw/qib/qib_file_ops.c: cpumask_weight(&current->cpus_allowed);
fs/proc/array.c: cpumask_pr_args(&task->cpus_allowed));
fs/proc/array.c: cpumask_pr_args(&task->cpus_allowed));
include/linux/init_task.h: .cpus_allowed = CPU_MASK_ALL, \
include/linux/sched.h: cpumask_t cpus_allowed;
include/linux/sched.h:/* Future-safe accessor for struct task_struct's cpus_allowed. */
include/linux/sched.h:#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
include/linux/sched.h:#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
kernel/sched/core.c: * __migrate_task() such that we will not miss enforcing cpus_allowed
kernel/sched/core.c: cpumask_copy(&p->cpus_allowed, new_mask);
kernel/sched/core.c: if (cpumask_equal(&p->cpus_allowed, new_mask))
kernel/sched/core.c: * ->cpus_allowed is protected by both rq->lock and p->pi_lock
kernel/sched/core.c: * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
kernel/sched/core.c: * to rely on ttwu() to place the task on a valid ->cpus_allowed
kernel/sched/core.c: * - cpus_allowed can change in the fork path
kernel/sched/core.c: if (!cpumask_subset(span, &p->cpus_allowed) ||
kernel/sched/core.c: cpumask_var_t cpus_allowed, new_mask;
kernel/sched/core.c: if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
kernel/sched/core.c: cpuset_cpus_allowed(p, cpus_allowed);
kernel/sched/core.c: cpumask_and(new_mask, in_mask, cpus_allowed);
kernel/sched/core.c: cpuset_cpus_allowed(p, cpus_allowed);
kernel/sched/core.c: if (!cpumask_subset(new_mask, cpus_allowed)) {
kernel/sched/core.c: * update. Just reset the cpus_allowed to the
kernel/sched/core.c: * cpuset's cpus_allowed
kernel/sched/core.c: cpumask_copy(new_mask, cpus_allowed);
kernel/sched/core.c: free_cpumask_var(cpus_allowed);
kernel/sched/core.c: cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
kernel/sched/core.c: * before cpus_allowed may be changed.
kernel/sched/core.c: * Rules for changing task_struct::cpus_allowed are holding
kernel/sched/fair.c: * 2) cannot be migrated to this CPU due to cpus_allowed, or
kernel/sched/fair.c: * isn't true due to cpus_allowed constraints and the like.
kernel/trace/trace_hwlat.c: if (!cpumask_equal(current_mask, &current->cpus_allowed))
kernel/workqueue.c: * its cpus_allowed. If @cpu is in @pool's cpumask which didn't have any
kernel/workqueue.c: * online CPU before, cpus_allowed of all its workers should be restored.
net/sunrpc/svc.c: * Set the given thread's cpus_allowed mask so that it

By turning ->cpus_allowed into a pointer we add a small amount of indirection, but
not in any true scheduler fast-path, and doing this would be preferable to -rt as
well as it self-maintains this notion in a way that will benefit -rt.

Note that we already pass around cpus_allowed by pointer in a few cases, so the
indirection cost should be pretty minimal.

Thanks,

Ingo