[PATCH v7 4/5] sched: Handle set_cpus_allowed_ptr() & sched_setaffinity() race
From: Waiman Long
Date: Fri Sep 02 2022 - 11:39:32 EST
Racing is possible between set_cpus_allowed_ptr() and sched_setaffinity()
or between multiple sched_setaffinity() calls from different
CPUs. To resolve these race conditions, we need to update both
user_cpus_ptr and cpus_mask in a single lock critical section instead
of separated ones. This requires moving the user_cpus_ptr update to
set_cpus_allowed_common().
The SCA_USER flag will be used to signify that a user_cpus_ptr update
will have to be done. The new user_cpus_ptr will be put into the
a percpu variable pending_user_mask at the beginning of the lock
crtical section. The pending user mask will then be taken up in
set_cpus_allowed_common().
Ideally, user_cpus_ptr should only be updated if the sched_setaffinity()
is successful. However, this patch will update user_cpus_ptr when the
first call to __set_cpus_allowed_ptr() is successful. However, if there
is racing between sched_setaffinity() and cpuset update, the subsequent
calls to __set_cpus_allowed_ptr() may fail but the user_cpus_ptr will
still be updated in this corner case. A warning will be printed in this
corner case.
Signed-off-by: Waiman Long <longman@xxxxxxxxxx>
---
kernel/sched/core.c | 59 ++++++++++++++++++++++++++++-----------------
1 file changed, 37 insertions(+), 22 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 84544daf3839..618341d0fa51 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -111,6 +111,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+DEFINE_PER_CPU(struct cpumask **, pending_user_mask);
#ifdef CONFIG_SCHED_DEBUG
/*
@@ -2199,6 +2200,7 @@ __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32
static int __set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask,
+ struct cpumask **puser_mask,
u32 flags);
static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
@@ -2249,7 +2251,8 @@ void migrate_enable(void)
*/
preempt_disable();
if (p->cpus_ptr != &p->cpus_mask)
- __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE);
+ __set_cpus_allowed_ptr(p, &p->cpus_mask, NULL,
+ SCA_MIGRATE_ENABLE);
/*
* Mustn't clear migration_disabled() until cpus_ptr points back at the
* regular cpus_mask, otherwise things that race (eg.
@@ -2538,6 +2541,12 @@ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_ma
cpumask_copy(&p->cpus_mask, new_mask);
p->nr_cpus_allowed = cpumask_weight(new_mask);
+
+ /*
+ * Swap in the new user_cpus_ptr if SCA_USER flag set
+ */
+ if (flags & SCA_USER)
+ swap(p->user_cpus_ptr, *__this_cpu_read(pending_user_mask));
}
static void
@@ -2926,12 +2935,19 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
* call is not atomic; no spinlocks may be held.
*/
static int __set_cpus_allowed_ptr(struct task_struct *p,
- const struct cpumask *new_mask, u32 flags)
+ const struct cpumask *new_mask,
+ struct cpumask **puser_mask,
+ u32 flags)
{
struct rq_flags rf;
struct rq *rq;
rq = task_rq_lock(p, &rf);
+ /*
+ * CPU won't be preempted or interrupted while holding task_rq_lock().
+ */
+ __this_cpu_write(pending_user_mask, puser_mask);
+
if (p->user_cpus_ptr && !(flags & SCA_USER) &&
cpumask_and(rq->scratch_mask, new_mask, p->user_cpus_ptr))
new_mask = rq->scratch_mask;
@@ -2941,7 +2957,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
- return __set_cpus_allowed_ptr(p, new_mask, 0);
+ return __set_cpus_allowed_ptr(p, new_mask, NULL, 0);
}
EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
@@ -3032,7 +3048,8 @@ void force_compatible_cpus_allowed_ptr(struct task_struct *p)
}
static int
-__sched_setaffinity(struct task_struct *p, const struct cpumask *mask, int flags);
+__sched_setaffinity(struct task_struct *p, const struct cpumask *mask,
+ struct cpumask **puser_mask, int flags);
/*
* Restore the affinity of a task @p which was previously restricted by a
@@ -3049,7 +3066,7 @@ void relax_compatible_cpus_allowed_ptr(struct task_struct *p)
* Try to restore the old affinity mask with __sched_setaffinity().
* Cpuset masking will be done there too.
*/
- ret = __sched_setaffinity(p, task_user_cpus(p), 0);
+ ret = __sched_setaffinity(p, task_user_cpus(p), NULL, 0);
WARN_ON_ONCE(ret);
}
@@ -3529,6 +3546,7 @@ void sched_set_stop_task(int cpu, struct task_struct *stop)
static inline int __set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask,
+ struct cpumask *user_mask,
u32 flags)
{
return set_cpus_allowed_ptr(p, new_mask);
@@ -8053,7 +8071,8 @@ int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
#endif
static int
-__sched_setaffinity(struct task_struct *p, const struct cpumask *mask, int flags)
+__sched_setaffinity(struct task_struct *p, const struct cpumask *mask,
+ struct cpumask **puser_mask, int flags)
{
int retval;
cpumask_var_t cpus_allowed, new_mask;
@@ -8072,8 +8091,10 @@ __sched_setaffinity(struct task_struct *p, const struct cpumask *mask, int flags
retval = dl_task_check_affinity(p, new_mask);
if (retval)
goto out_free_new_mask;
+
+ retval = __set_cpus_allowed_ptr(p, new_mask, puser_mask,
+ SCA_CHECK | flags);
again:
- retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK | flags);
if (retval)
goto out_free_new_mask;
@@ -8084,6 +8105,14 @@ __sched_setaffinity(struct task_struct *p, const struct cpumask *mask, int flags
* Just reset the cpumask to the cpuset's cpus_allowed.
*/
cpumask_copy(new_mask, cpus_allowed);
+ retval = __set_cpus_allowed_ptr(p, new_mask, NULL, SCA_CHECK);
+
+ /*
+ * Warn in case of the unexpected success in updating
+ * user_cpus_ptr in first __set_cpus_allowed_ptr() but then
+ * fails in a subsequent retry.
+ */
+ WARN_ON_ONCE(retval && (flags | SCA_USER));
goto again;
}
@@ -8138,21 +8167,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
}
cpumask_copy(user_mask, in_mask);
- retval = __sched_setaffinity(p, in_mask, SCA_USER);
-
- /*
- * Save in_mask into user_cpus_ptr after a successful
- * __sched_setaffinity() call. pi_lock is used to synchronize
- * changes to user_cpus_ptr.
- */
- if (!retval) {
- unsigned long flags;
-
- /* Use pi_lock to synchronize changes to user_cpus_ptr */
- raw_spin_lock_irqsave(&p->pi_lock, flags);
- swap(p->user_cpus_ptr, user_mask);
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
- }
+ retval = __sched_setaffinity(p, in_mask, &user_mask, SCA_USER);
kfree(user_mask);
out_put_task:
--
2.31.1