[RFC PATCH] x86/mm/fault: Inline page fault paths to reduce kernel text size

From: Pekka Enberg
Date: Tue Aug 25 2020 - 15:11:37 EST


From: Pekka Enberg <penberg@xxxxxxxxxx>

The commit 92181f190b649f7ef2b79cbf5c00f26ccc66da2a ("x86: optimise
x86's do_page_fault (C entry point for the page fault path)") from 2009
shows significant stack savings when infrequent page fault handling
paths are moved out of line with the "noinline" annotation, with some increase
in kernel text size.

However, a decade of GCC improvements (and changes in the code) has
eliminated such wins in stack usage:

With "noinline":

0000000000000b30 <do_user_addr_fault>:
b30: 41 57 push %r15
b32: 41 56 push %r14
b34: 41 55 push %r13
b36: 49 89 d5 mov %rdx,%r13
b39: 41 54 push %r12
b3b: 49 89 fc mov %rdi,%r12
b3e: 55 push %rbp
b3f: 48 89 f5 mov %rsi,%rbp
b42: 53 push %rbx
b43: 65 48 8b 04 25 00 00 mov %gs:0x0,%rax
b4a: 00 00
b4c: 48 83 ec 18 sub $0x18,%rsp

With "inline":

00000000000008a0 <do_user_addr_fault>:
8a0: 41 57 push %r15
8a2: 41 56 push %r14
8a4: 41 55 push %r13
8a6: 49 89 d5 mov %rdx,%r13
8a9: 41 54 push %r12
8ab: 49 89 fc mov %rdi,%r12
8ae: 55 push %rbp
8af: 48 89 f5 mov %rsi,%rbp
8b2: 53 push %rbx
8b3: 65 48 8b 04 25 00 00 mov %gs:0x0,%rax
8ba: 00 00
8bc: 48 83 ec 18 sub $0x18,%rsp

So all we're left with is the increase in kernel text:

add/remove: 1/5 grow/shrink: 2/1 up/down: 1049/-1144 (-95)
Function old new delta
spurious_kernel_fault.part - 559 +559
do_user_addr_fault 988 1446 +458
do_kern_addr_fault 103 135 +32
bad_area_nosemaphore 13 - -13
bad_area 66 - -66
pgtable_bad 107 - -107
mm_fault_error 199 - -199
bad_area_access_error 226 - -226
spurious_kernel_fault 553 20 -533
Total: Before=7359, After=7264, chg -1.29%

Therefore, let's inline the page fault handling paths to reduce kernel
text size.

Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Signed-off-by: Pekka Enberg <penberg@xxxxxxxxxx>
---
arch/x86/mm/fault.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 35f1498e9832..b8893684d80c 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -533,7 +533,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
dump_pagetable(address);
}

-static noinline void
+static inline void
pgtable_bad(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
@@ -577,7 +577,7 @@ static void set_signal_archinfo(unsigned long address,
tsk->thread.cr2 = address;
}

-static noinline void
+static inline void
no_context(struct pt_regs *regs, unsigned long error_code,
unsigned long address, int signal, int si_code)
{
@@ -788,7 +788,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
no_context(regs, error_code, address, SIGSEGV, si_code);
}

-static noinline void
+static inline void
bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
@@ -809,7 +809,7 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
__bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
}

-static noinline void
+static inline void
bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
__bad_area(regs, error_code, address, 0, SEGV_MAPERR);
@@ -832,7 +832,7 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
return false;
}

-static noinline void
+static inline void
bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
unsigned long address, struct vm_area_struct *vma)
{
@@ -905,7 +905,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
}

-static noinline void
+static inline void
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
unsigned long address, vm_fault_t fault)
{
@@ -971,7 +971,7 @@ static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte)
* See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
* (Optional Invalidation).
*/
-static noinline int
+static inline int
spurious_kernel_fault(unsigned long error_code, unsigned long address)
{
pgd_t *pgd;
--
2.26.2