Re: [PATCH v3] stop_machine: Make migration_cpu_stop() does useful works for CONFIG_PREEMPT_NONE

From: Peter Zijlstra
Date: Tue Sep 13 2016 - 12:37:41 EST


On Tue, Sep 13, 2016 at 06:14:27PM +0200, Oleg Nesterov wrote:

> Me too, and I failed to find something which could be broken... So
> perhaps should make it nop and investigate the new bug reports after
> that.

Works for me :-)

>
> Hmm. And preempt_enable_no_resched_notrace() under TASK_DEAD in
> __schedule() should be removed it seems, do_exit() can call __schedule()
> directly.


something like so?

---

include/linux/kernel.h | 2 +-
include/linux/sched.h | 2 ++
kernel/exit.c | 11 ++---------
kernel/sched/core.c | 23 ++++++++++++-----------
4 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d96a6118d26a..e5bd9cdd2e24 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -266,7 +266,7 @@ extern void oops_enter(void);
extern void oops_exit(void);
void print_oops_end_marker(void);
extern int oops_may_print(void);
-void do_exit(long error_code)
+void __noreturn do_exit(long error_code)
__noreturn;
void complete_and_exit(struct completion *, long)
__noreturn;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eb64fcd89e68..b0c818a05b2e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -448,6 +448,8 @@ static inline void io_schedule(void)
io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
}

+void __noreturn do_task_dead(void);
+
struct nsproxy;
struct user_namespace;

diff --git a/kernel/exit.c b/kernel/exit.c
index 091a78be3b09..d4c12692f766 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -725,7 +725,7 @@ static void check_stack_usage(void)
static inline void check_stack_usage(void) {}
#endif

-void do_exit(long code)
+void __noreturn do_exit(long code)
{
struct task_struct *tsk = current;
int group_dead;
@@ -897,14 +897,7 @@ void do_exit(long code)
smp_mb();
raw_spin_unlock_wait(&tsk->pi_lock);

- /* causes final put_task_struct in finish_task_switch(). */
- tsk->state = TASK_DEAD;
- tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
- schedule();
- BUG();
- /* Avoid "noreturn function does return". */
- for (;;)
- cpu_relax(); /* For when BUG is null */
+ do_task_dead();
}
EXPORT_SYMBOL_GPL(do_exit);

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a0086a5fc008..6034f269000f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3327,17 +3327,6 @@ static void __sched notrace __schedule(bool preempt)
rq = cpu_rq(cpu);
prev = rq->curr;

- /*
- * do_exit() calls schedule() with preemption disabled as an exception;
- * however we must fix that up, otherwise the next task will see an
- * inconsistent (higher) preempt count.
- *
- * It also avoids the below schedule_debug() test from complaining
- * about this.
- */
- if (unlikely(prev->state == TASK_DEAD))
- preempt_enable_no_resched_notrace();
-
schedule_debug(prev);

if (sched_feat(HRTICK))
@@ -3404,6 +3393,18 @@ static void __sched notrace __schedule(bool preempt)
balance_callback(rq);
}

+void __noreturn do_task_dead(void)
+{
+ /* causes final put_task_struct in finish_task_switch(). */
+ __set_current_state(TASK_DEAD);
+ current->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
+ __schedule(false);
+ BUG();
+ /* Avoid "noreturn function does return". */
+ for (;;)
+ cpu_relax(); /* For when BUG is null */
+}
+
static inline void sched_submit_work(struct task_struct *tsk)
{
if (!tsk->state || tsk_is_pi_blocked(tsk))