[PATCH] mm: Return faster for non-fatal signals in user mode faults

From: Peter Xu
Date: Wed Oct 31 2018 - 21:55:29 EST


The idea comes from the upstream discussion between Linus and Andrea:

https://lore.kernel.org/lkml/20171102193644.GB22686@xxxxxxxxxx/

A summary to the issue: there was a special path in handle_userfault()
in the past that we'll return a VM_FAULT_NOPAGE when we detected
non-fatal signals when waiting for userfault handling. We did that by
reacquiring the mmap_sem before returning. However that brings a risk
in that the vmas might have changed when we retake the mmap_sem and
even we could be holding an invalid vma structure.

This patch is a preparation of removing that special path by allowing
the page fault to return even faster if we were interrupted by a
non-fatal signal during a user-mode page fault handling routine.

Suggested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Suggested-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Signed-off-by: Peter Xu <peterx@xxxxxxxxxx>
---
arch/alpha/mm/fault.c | 2 +-
arch/arc/mm/fault.c | 5 +++++
arch/arm/mm/fault.c | 5 +++++
arch/arm64/mm/fault.c | 4 ++++
arch/hexagon/mm/vm_fault.c | 2 +-
arch/ia64/mm/fault.c | 2 +-
arch/m68k/mm/fault.c | 2 +-
arch/microblaze/mm/fault.c | 2 +-
arch/mips/mm/fault.c | 2 +-
arch/nds32/mm/fault.c | 5 +++++
arch/nios2/mm/fault.c | 2 +-
arch/openrisc/mm/fault.c | 2 +-
arch/parisc/mm/fault.c | 2 +-
arch/powerpc/mm/fault.c | 2 ++
arch/riscv/mm/fault.c | 4 ++--
arch/s390/mm/fault.c | 3 +--
arch/sh/mm/fault.c | 4 ++++
arch/sparc/mm/fault_32.c | 2 +-
arch/sparc/mm/fault_64.c | 2 +-
arch/um/kernel/trap.c | 4 +++-
arch/unicore32/mm/fault.c | 4 ++--
arch/x86/mm/fault.c | 2 ++
arch/xtensa/mm/fault.c | 2 +-
include/linux/sched/signal.h | 14 ++++++++++++++
24 files changed, 61 insertions(+), 19 deletions(-)

diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index de4cc6936391..fcfa229cc1e7 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -150,7 +150,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
the fault. */
fault = handle_mm_fault(vma, address, flags);

- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if (fault_signal_pending(fault, regs))
return;

if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 61919e4e4eec..27adf4e608e4 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -142,6 +142,11 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
goto no_context;
return;
}
+
+ /* Allow user to handle non-fatal signals first */
+ if (signal_pending(current) && user_mode(regs))
+ return;
+
/*
* retry state machine
*/
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 2ae28ffec622..44fa64dbb8e0 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -301,6 +301,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
return 0;
}

+ /* Fast path to handle user mode signals */
+ if ((fault & VM_FAULT_RETRY) && user_mode(regs) &&
+ signal_pending(current))
+ return 0;
+
/*
* Major/minor page fault accounting is only done on the
* initial attempt. If we go through a retry, it is extremely
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 613e7434c208..8ff04af1d982 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -490,6 +490,10 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
return 0;
}

+ /* Fast path for user mode signals */
+ if (user_mode(regs) && signal_pending(current))
+ return 0;
+
/*
* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
* starvation.
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index 223787e01bdd..d9e15d941bdb 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -91,7 +91,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)

fault = handle_mm_fault(vma, address, flags);

- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if (fault_signal_pending(fault, regs))
return;

/* The most common case -- we are done. */
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index d039b846f671..b5aa4e80c762 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -141,7 +141,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
*/
fault = handle_mm_fault(vma, address, flags);

- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if (fault_signal_pending(fault, regs))
return;

if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 8e734309ace9..182799fd9987 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -138,7 +138,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
fault = handle_mm_fault(vma, address, flags);
pr_debug("handle_mm_fault returns %x\n", fault);

- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if (fault_signal_pending(fault, regs))
return 0;

if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index 45c9f66c1dbc..32da02778a63 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -217,7 +217,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
*/
fault = handle_mm_fault(vma, address, flags);

- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if (fault_signal_pending(fault, regs))
return;

if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 6660b77ff8f3..f811ef5efb80 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -154,7 +154,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
*/
fault = handle_mm_fault(vma, address, flags);

- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if (fault_signal_pending(regs))
return;

perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index a40de112a23a..c4c53de68ec3 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -216,6 +216,11 @@ void do_page_fault(unsigned long entry, unsigned long addr,
return;
}

+ /* Fast path for user mode signals */
+ if ((fault & VM_FAULT_RETRY) && signal_pending(current) &&
+ user_mode(regs))
+ return;
+
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
index a401b45cae47..c38bea4220fb 100644
--- a/arch/nios2/mm/fault.c
+++ b/arch/nios2/mm/fault.c
@@ -133,7 +133,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
*/
fault = handle_mm_fault(vma, address, flags);

- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if (fault_signal_pending(fault, regs))
return;

if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index fd1592a56238..30d5c51e9d40 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -161,7 +161,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,

fault = handle_mm_fault(vma, address, flags);

- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if (fault_signal_pending(fault, regs))
return;

if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 355e3e13fa72..8e88e5c5f26a 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -304,7 +304,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,

fault = handle_mm_fault(vma, address, flags);

- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if (fault_signal_pending(fault, regs))
return;

if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 408ee769c470..d321a6c5fe62 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -596,6 +596,8 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
*/
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
+ if (is_user && signal_pending(current))
+ return 0;
if (!fatal_signal_pending(current))
goto retry;
}
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index deeb820bd855..ba652a2a6ad9 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -111,11 +111,11 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
fault = handle_mm_fault(vma, addr, flags);

/*
- * If we need to retry but a fatal signal is pending, handle the
+ * If we need to retry but a signal is pending, try to handle the
* signal first. We do not need to release the mmap_sem because it
* would already be released in __lock_page_or_retry in mm/filemap.c.
*/
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(tsk))
+ if (fault_signal_pending(fault, regs))
return;

if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 74a77b2bca75..551ac311bd35 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -480,8 +480,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
* the fault.
*/
fault = handle_mm_fault(vma, address, flags);
- /* No reason to continue if interrupted by SIGKILL. */
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+ if (fault_signal_pending(fault, regs)) {
fault = VM_FAULT_SIGNAL;
if (flags & FAULT_FLAG_RETRY_NOWAIT)
goto out_up;
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index becf0be267bb..f620282a37fd 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -489,6 +489,10 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
* have already released it in __lock_page_or_retry
* in mm/filemap.c.
*/
+
+ if (user_mode(regs) && signal_pending(tsk))
+ return;
+
goto retry;
}
}
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 0863f6fdd2c5..447f61c4e996 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -237,7 +237,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
*/
fault = handle_mm_fault(vma, address, flags);

- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if (fault_signal_pending(fault, regs))
return;

if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index a1cba3eef79e..6807fba66331 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -421,7 +421,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)

fault = handle_mm_fault(vma, address, flags);

- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if (fault_signal_pending(fault, regs))
goto exit_exception;

if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index bc2756782d64..32a6830c1d45 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -76,7 +76,9 @@ int handle_page_fault(unsigned long address, unsigned long ip,

fault = handle_mm_fault(vma, address, flags);

- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if ((fault & VM_FAULT_RETRY) &&
+ (fatal_signal_pending(current) ||
+ (is_user && signal_pending(current))))
goto out_nosemaphore;

if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index 60453c892c51..f24a1967c323 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -246,11 +246,11 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)

fault = __do_pf(mm, addr, fsr, flags, tsk);

- /* If we need to retry but a fatal signal is pending, handle the
+ /* If we need to retry but a signal is pending, try to handle the
* signal first. We do not need to release the mmap_sem because
* it would already be released in __lock_page_or_retry in
* mm/filemap.c. */
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if (fault_signal_pending(fault, regs))
return 0;

if (!(fault & VM_FAULT_ERROR) && (flags & FAULT_FLAG_ALLOW_RETRY)) {
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 994c860ac2d8..f7836472961e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1451,6 +1451,8 @@ void do_user_addr_fault(struct pt_regs *regs,
if (flags & FAULT_FLAG_ALLOW_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
+ if ((flags & FAULT_FLAG_USER) && signal_pending(tsk))
+ return;
if (!fatal_signal_pending(tsk))
goto retry;
}
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index d2b082908538..6467e3fb9762 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -110,7 +110,7 @@ void do_page_fault(struct pt_regs *regs)
*/
fault = handle_mm_fault(vma, address, flags);

- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if (fault_signal_pending(fault, regs))
return;

if (unlikely(fault & VM_FAULT_ERROR)) {
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index efd8ce7675ed..3f517a30ec4d 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -377,6 +377,20 @@ static inline int signal_pending_state(long state, struct task_struct *p)
return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
}

+/*
+ * This should only be used in fault handlers to decide whether we
+ * should stop the current fault routine to handle the signals
+ * instead. It should normally be used when a signal interrupted a
+ * page fault which can lead to a VM_FAULT_RETRY.
+ */
+static inline bool fault_signal_pending(unsigned int fault_flags,
+ struct pt_regs *regs)
+{
+ return (fault_flags & VM_FAULT_RETRY) &&
+ (fatal_signal_pending(current) ||
+ (user_mode(regs) && signal_pending(current)));
+}
+
/*
* Reevaluate whether the task has signals pending delivery.
* Wake the task if so.
--
2.21.0


--C7zPtVaVf+AK4Oqc--