[BUG] from x86: Support kmap_local() forced debugging

From: Steven Rostedt
Date: Wed Jan 06 2021 - 18:02:48 EST


On Wed, 18 Nov 2020 20:48:41 +0100
Thomas Gleixner <tglx@xxxxxxxxxxxxx> wrote:

> kmap_local() and related interfaces are NOOPs on 64bit and only create
> temporary fixmaps for highmem pages on 32bit. That means the test coverage
> for this code is pretty small.
>
> CONFIG_KMAP_LOCAL can be enabled independent from CONFIG_HIGHMEM, which
> allows to provide support for enforced kmap_local() debugging even on
> 64bit.
>
> For 32bit the support is unconditional, for 64bit it's only supported when
> CONFIG_NR_CPUS <= 4096 as supporting it for 8192 CPUs would require to set
> up yet another fixmap PGT.
>
> If CONFIG_KMAP_LOCAL_FORCE_DEBUG is enabled then kmap_local()/kmap_atomic()
> will use the temporary fixmap mapping path.
>
> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> ---
> V4: New patch
> ---
> arch/x86/Kconfig | 1 +
> arch/x86/include/asm/fixmap.h | 12 +++++++++---
> arch/x86/include/asm/pgtable_64_types.h | 6 +++++-
> 3 files changed, 15 insertions(+), 4 deletions(-)
>
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -93,6 +93,7 @@ config X86
> select ARCH_SUPPORTS_ACPI
> select ARCH_SUPPORTS_ATOMIC_RMW
> select ARCH_SUPPORTS_NUMA_BALANCING if X86_64

I triggered the following crash on x86_32 by simply doing a:

(ssh'ing into the box)

# head -100 /tmp/output-file

Where the /tmp/output-file was the output of a trace-cmd report.
Even after rebooting and not running the tracing code, simply doing the
head command still crashed.

BUG: unable to handle page fault for address: fff58000
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
*pdpt = 0000000006de9001 *pde = 0000000001968063 *pte = 0000000000000000
Oops: 0000 [#1] SMP PTI
CPU: 3 PID: 3935 Comm: sshd Not tainted 5.11.0-rc2-test+ #2
Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
EIP: skb_copy_bits+0x10c/0x1b9
Code: 3b 5d e8 0f 47 5d e8 c7 45 e0 00 00 00 00 8b 7d e0 39 7d e8 76 3a 8b 45 d4 e8 a4 e4 ff ff 8b 55 e4 03 55 e0 89 d9 01 c6 89 d7 <f3> a4 e8 c9 e4 ff ff 01 5d e0 8b 5d e8 b8 00 10 00 00 2b 5d e0 83
EAX: fff57000 EBX: 000005a8 ECX: 000000f8 EDX: c77b9900
ESI: fff58000 EDI: c77b9db0 EBP: c6de39ec ESP: c6de39c0
DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00210286
CR0: 80050033 CR2: fff58000 CR3: 06de6000 CR4: 001506f0
Call Trace:
skb_segment+0x4a3/0x828
? __tcp_mtu_to_mss+0x2d/0x6b
tcp_gso_segment+0xf6/0x336
? list_add+0x26/0x26
tcp4_gso_segment+0x77/0x7c
? tcp_gso_segment+0x336/0x336
inet_gso_segment+0x1a1/0x2df
? inet_unregister_protosw+0x5e/0x5e
skb_mac_gso_segment+0xb9/0x107
__skb_gso_segment+0xdf/0x10f
? netif_skb_features+0x1ca/0x24a
? __qdisc_run+0x1e4/0x418
validate_xmit_skb.constprop.0+0x10f/0x1ad
validate_xmit_skb_list+0x25/0x45
sch_direct_xmit+0x5c/0x19d
__qdisc_run+0x3e3/0x418
? qdisc_run_begin+0x53/0x5d
qdisc_run+0x26/0x30
__dev_queue_xmit+0x2bd/0x524
? mark_held_locks+0x40/0x51
dev_queue_xmit+0xf/0x11
ip_finish_output2+0x378/0x3d7
__ip_finish_output+0xd6/0xe2
ip_output+0x8c/0xbb
? ip_mc_output+0x18d/0x18d
dst_output+0x27/0x2d
ip_local_out+0x2b/0x30
__ip_queue_xmit+0x32e/0x38e
? __copy_skb_header+0x4b/0x98
? __ip_queue_xmit+0x38e/0x38e
ip_queue_xmit+0x16/0x1b
__tcp_transmit_skb+0x731/0x794
tcp_transmit_skb+0x16/0x18
tcp_write_xmit+0x7b4/0xa90
__tcp_push_pending_frames+0x2c/0x6b
tcp_push+0x8c/0xf1
tcp_sendmsg_locked+0x74a/0x7f2
? tcp_sendmsg_locked+0x7f2/0x7f2
tcp_sendmsg+0x27/0x38
? tcp_sendmsg_locked+0x7f2/0x7f2
inet_sendmsg+0x3c/0x5f
? inet_send_prepare+0x3b/0x3b
sock_sendmsg_nosec+0x1a/0x2d
sock_sendmsg+0x25/0x29
sock_write_iter+0x84/0xa7
vfs_write+0xf5/0x19b
ksys_write+0x68/0xaa
__ia32_sys_write+0x15/0x17
__do_fast_syscall_32+0x66/0x76
do_fast_syscall_32+0x29/0x5b
do_SYSENTER_32+0x15/0x17
entry_SYSENTER_32+0x9f/0xf2
EIP: 0xb7ee3545
Code: c4 01 10 03 03 74 c0 01 10 05 03 74 b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76
EAX: ffffffda EBX: 00000003 ECX: 01d12448 EDX: 00002028
ESI: 00002028 EDI: 01d12448 EBP: bff4e388 ESP: bff4e328
DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00200246
? asm_exc_nmi+0xc5/0x2ab
Modules linked in: ppdev parport_pc parport
CR2: 00000000fff58000
---[ end trace 3d4582614c9c2a0e ]---
EIP: skb_copy_bits+0x10c/0x1b9
Code: 3b 5d e8 0f 47 5d e8 c7 45 e0 00 00 00 00 8b 7d e0 39 7d e8 76 3a 8b 45 d4 e8 a4 e4 ff ff 8b 55 e4 03 55 e0 89 d9 01 c6 89 d7 <f3> a4 e8 c9 e4 ff ff 01 5d e0 8b 5d e8 b8 00 10 00 00 2b 5d e0 83
EAX: fff57000 EBX: 000005a8 ECX: 000000f8 EDX: c77b9900
ESI: fff58000 EDI: c77b9db0 EBP: c6de39ec ESP: c6de39c0
DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00210286
CR0: 80050033 CR2: fff58000 CR3: 06de6000 CR4: 001506f0
Kernel panic - not syncing: Fatal exception in interrupt
Kernel Offset: disabled
---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---

This was against 5.11-rc2.

I bisected it down to the commit that added this patch.

> + select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096

If I remove the above line, it works fine.

Attached is the config file.

-- Steve

> select ARCH_USE_BUILTIN_BSWAP
> select ARCH_USE_QUEUED_RWLOCKS
> select ARCH_USE_QUEUED_SPINLOCKS
> --- a/arch/x86/include/asm/fixmap.h
> +++ b/arch/x86/include/asm/fixmap.h
> @@ -14,13 +14,20 @@
> #ifndef _ASM_X86_FIXMAP_H
> #define _ASM_X86_FIXMAP_H
>
> +#include <asm/kmap_size.h>
> +
> /*
> * Exposed to assembly code for setting up initial page tables. Cannot be
> * calculated in assembly code (fixmap entries are an enum), but is sanity
> * checked in the actual fixmap C code to make sure that the fixmap is
> * covered fully.
> */
> -#define FIXMAP_PMD_NUM 2
> +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP
> +# define FIXMAP_PMD_NUM 2
> +#else
> +# define KM_PMDS (KM_MAX_IDX * ((CONFIG_NR_CPUS + 511) / 512))
> +# define FIXMAP_PMD_NUM (KM_PMDS + 2)
> +#endif
> /* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */
> #define FIXMAP_PMD_TOP 507
>
> @@ -31,7 +38,6 @@
> #include <asm/pgtable_types.h>
> #ifdef CONFIG_X86_32
> #include <linux/threads.h>
> -#include <asm/kmap_size.h>
> #else
> #include <uapi/asm/vsyscall.h>
> #endif
> @@ -92,7 +98,7 @@ enum fixed_addresses {
> FIX_IO_APIC_BASE_0,
> FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
> #endif
> -#ifdef CONFIG_X86_32
> +#ifdef CONFIG_KMAP_LOCAL
> FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
> FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1,
> #ifdef CONFIG_PCI_MMCONFIG
> --- a/arch/x86/include/asm/pgtable_64_types.h
> +++ b/arch/x86/include/asm/pgtable_64_types.h
> @@ -143,7 +143,11 @@ extern unsigned int ptrs_per_p4d;
>
> #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
> /* The module sections ends with the start of the fixmap */
> -#define MODULES_END _AC(0xffffffffff000000, UL)
> +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP
> +# define MODULES_END _AC(0xffffffffff000000, UL)
> +#else
> +# define MODULES_END _AC(0xfffffffffe000000, UL)
> +#endif
> #define MODULES_LEN (MODULES_END - MODULES_VADDR)
>
> #define ESPFIX_PGD_ENTRY _AC(-2, UL)
>
>

Attachment: config
Description: Binary data