Re: [PATCH v6 2/5] sched: Use user_cpus_ptr for saving user provided cpumask in sched_setaffinity()
From: Peter Zijlstra
Date: Wed Aug 31 2022 - 05:12:54 EST
On Thu, Aug 25, 2022 at 09:01:16PM -0400, Waiman Long wrote:
> void relax_compatible_cpus_allowed_ptr(struct task_struct *p)
> {
> - struct cpumask *user_mask = p->user_cpus_ptr;
> - unsigned long flags;
> -
> /*
> - * Try to restore the old affinity mask. If this fails, then
> - * we free the mask explicitly to avoid it being inherited across
> - * a subsequent fork().
> + * Try to restore the old affinity mask with __sched_setaffinity().
> + * Cpuset masking will be done there too.
> */
> - if (!user_mask || !__sched_setaffinity(p, user_mask))
> - return;
> -
> - raw_spin_lock_irqsave(&p->pi_lock, flags);
> - user_mask = clear_user_cpus_ptr(p);
> - raw_spin_unlock_irqrestore(&p->pi_lock, flags);
> -
> - kfree(user_mask);
> + __sched_setaffinity(p, task_user_cpus(p), false);
> }
We have an issue with __sched_setaffinity() failing here. I'm not sure
ignoring the failure is the right thing -- but I'm also not enturely
sure what is.
> void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
> @@ -8081,10 +8046,11 @@ int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
> #endif
>
> static int
> -__sched_setaffinity(struct task_struct *p, const struct cpumask *mask)
> +__sched_setaffinity(struct task_struct *p, const struct cpumask *mask, bool save_mask)
> {
> int retval;
> cpumask_var_t cpus_allowed, new_mask;
> + struct cpumask *user_mask = NULL;
>
> if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL))
> return -ENOMEM;
> @@ -8100,8 +8066,22 @@ __sched_setaffinity(struct task_struct *p, const struct cpumask *mask)
> retval = dl_task_check_affinity(p, new_mask);
> if (retval)
> goto out_free_new_mask;
> +
> + /*
> + * Save the user requested mask internally now and then update
> + * user_cpus_ptr later after making sure this call will be
> + * successful, i.e. retval == 0.
> + */
> + if (save_mask) {
> + user_mask = kmalloc(cpumask_size(), GFP_KERNEL);
> + if (!user_mask) {
> + retval = -ENOMEM;
> + goto out_free_new_mask;
> + }
> + cpumask_copy(user_mask, mask);
> + }
> again:
> - retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK | SCA_USER);
> + retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK);
> if (retval)
> goto out_free_new_mask;
>
> @@ -8115,7 +8095,16 @@ __sched_setaffinity(struct task_struct *p, const struct cpumask *mask)
> goto again;
> }
>
> + if (save_mask) {
> + unsigned long flags;
> +
> + /* Use pi_lock to synchronize changes to user_cpus_ptr */
> + raw_spin_lock_irqsave(&p->pi_lock, flags);
> + swap(p->user_cpus_ptr, user_mask);
> + raw_spin_unlock_irqrestore(&p->pi_lock, flags);
> + }
> out_free_new_mask:
> + kfree(user_mask);
> free_cpumask_var(new_mask);
> out_free_cpus_allowed:
> free_cpumask_var(cpus_allowed);
I'm confused as to why it's put in this function and not in the one
caller that actually sets the new @save_mask true, here:
> @@ -8158,7 +8147,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
> if (retval)
> goto out_put_task;
>
> - retval = __sched_setaffinity(p, in_mask);
> + retval = __sched_setaffinity(p, in_mask, true);
> out_put_task:
> put_task_struct(p);
> return retval;