[PATCH v2 3/4] task: With a grace period after finish_task_switch, remove unnecessary code

From: Eric W. Biederman
Date: Sat Sep 14 2019 - 08:39:15 EST



Remove work arounds that were written before there was a grace period
after tasks left the runqueue in finish_task_switch.

In particular now that there tasks exiting the runqueue exprience
a rcu grace period none of the work performed by task_rcu_dereference
excpet the rcu_dereference is necessary so replace task_rcu_dereference
with rcu_dereference.

Remove the code in rcuwait_wait_event that checks to ensure the current
task has not exited. It is no longer necessary as it is guaranteed
that any running task will experience a rcu grace period after it
leaves the run queueue.

Remove the comment in rcuwait_wake_up as it is no longer relevant.

Cc: Davidlohr Bueso <dave@xxxxxxxxxxxx>
Cc: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
Ref: 8f95c90ceb54 ("sched/wait, RCU: Introduce rcuwait machinery")
Ref: 150593bf8693 ("sched/api: Introduce task_rcu_dereference() and try_get_task_struct()")
Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
---
include/linux/rcuwait.h | 20 +++---------
include/linux/sched/task.h | 1 -
kernel/exit.c | 67 --------------------------------------
kernel/sched/fair.c | 2 +-
kernel/sched/membarrier.c | 4 +--
5 files changed, 7 insertions(+), 87 deletions(-)

diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 563290fc194f..75c97e4bbc57 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -6,16 +6,11 @@

/*
* rcuwait provides a way of blocking and waking up a single
- * task in an rcu-safe manner; where it is forbidden to use
- * after exit_notify(). task_struct is not properly rcu protected,
- * unless dealing with rcu-aware lists, ie: find_task_by_*().
+ * task in an rcu-safe manner.
*
- * Alternatively we have task_rcu_dereference(), but the return
- * semantics have different implications which would break the
- * wakeup side. The only time @task is non-nil is when a user is
- * blocked (or checking if it needs to) on a condition, and reset
- * as soon as we know that the condition has succeeded and are
- * awoken.
+ * The only time @task is non-nil is when a user is blocked (or
+ * checking if it needs to) on a condition, and reset as soon as we
+ * know that the condition has succeeded and are awoken.
*/
struct rcuwait {
struct task_struct __rcu *task;
@@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
*/
#define rcuwait_wait_event(w, condition) \
({ \
- /* \
- * Complain if we are called after do_exit()/exit_notify(), \
- * as we cannot rely on the rcu critical region for the \
- * wakeup side. \
- */ \
- WARN_ON(current->exit_state); \
- \
rcu_assign_pointer((w)->task, current); \
for (;;) { \
/* \
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 4c44c37236b2..8bd51af44bf8 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -115,7 +115,6 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
}

-struct task_struct *task_rcu_dereference(struct task_struct **ptask);
void put_task_struct_rcu_user(struct task_struct *task);

#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
diff --git a/kernel/exit.c b/kernel/exit.c
index 2e259286f4e7..f943773622fc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -234,69 +234,6 @@ void release_task(struct task_struct *p)
goto repeat;
}

-/*
- * Note that if this function returns a valid task_struct pointer (!NULL)
- * task->usage must remain >0 for the duration of the RCU critical section.
- */
-struct task_struct *task_rcu_dereference(struct task_struct **ptask)
-{
- struct sighand_struct *sighand;
- struct task_struct *task;
-
- /*
- * We need to verify that release_task() was not called and thus
- * delayed_put_task_struct() can't run and drop the last reference
- * before rcu_read_unlock(). We check task->sighand != NULL,
- * but we can read the already freed and reused memory.
- */
-retry:
- task = rcu_dereference(*ptask);
- if (!task)
- return NULL;
-
- probe_kernel_address(&task->sighand, sighand);
-
- /*
- * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
- * was already freed we can not miss the preceding update of this
- * pointer.
- */
- smp_rmb();
- if (unlikely(task != READ_ONCE(*ptask)))
- goto retry;
-
- /*
- * We've re-checked that "task == *ptask", now we have two different
- * cases:
- *
- * 1. This is actually the same task/task_struct. In this case
- * sighand != NULL tells us it is still alive.
- *
- * 2. This is another task which got the same memory for task_struct.
- * We can't know this of course, and we can not trust
- * sighand != NULL.
- *
- * In this case we actually return a random value, but this is
- * correct.
- *
- * If we return NULL - we can pretend that we actually noticed that
- * *ptask was updated when the previous task has exited. Or pretend
- * that probe_slab_address(&sighand) reads NULL.
- *
- * If we return the new task (because sighand is not NULL for any
- * reason) - this is fine too. This (new) task can't go away before
- * another gp pass.
- *
- * And note: We could even eliminate the false positive if re-read
- * task->sighand once again to avoid the falsely NULL. But this case
- * is very unlikely so we don't care.
- */
- if (!sighand)
- return NULL;
-
- return task;
-}
-
void rcuwait_wake_up(struct rcuwait *w)
{
struct task_struct *task;
@@ -316,10 +253,6 @@ void rcuwait_wake_up(struct rcuwait *w)
*/
smp_mb(); /* (B) */

- /*
- * Avoid using task_rcu_dereference() magic as long as we are careful,
- * see comment in rcuwait_wait_event() regarding ->exit_state.
- */
task = rcu_dereference(w->task);
if (task)
wake_up_process(task);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bc9cfeaac8bd..215c640e2a6b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1644,7 +1644,7 @@ static void task_numa_compare(struct task_numa_env *env,
return;

rcu_read_lock();
- cur = task_rcu_dereference(&dst_rq->curr);
+ cur = rcu_dereference(dst_rq->curr);
if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
cur = NULL;

diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index aa8d75804108..b14250a11608 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -71,7 +71,7 @@ static int membarrier_global_expedited(void)
continue;

rcu_read_lock();
- p = task_rcu_dereference(&cpu_rq(cpu)->curr);
+ p = rcu_dereference(cpu_rq(cpu)->curr);
if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
if (!fallback)
@@ -150,7 +150,7 @@ static int membarrier_private_expedited(int flags)
if (cpu == raw_smp_processor_id())
continue;
rcu_read_lock();
- p = task_rcu_dereference(&cpu_rq(cpu)->curr);
+ p = rcu_dereference(cpu_rq(cpu)->curr);
if (p && p->mm == current->mm) {
if (!fallback)
__cpumask_set_cpu(cpu, tmpmask);
--
2.21.0.dirty