Re: [PATCH v3 5/5] RISC-V: Add crash kernel support

From: Palmer Dabbelt
Date: Thu Apr 22 2021 - 23:31:01 EST


On Mon, 05 Apr 2021 01:57:12 PDT (-0700), mick@xxxxxxxxxxxx wrote:
From: Nick Kossifidis <mickflemm@xxxxxxxxx>

This doesn't match the SOB. I just fixed it up. IIUC that's not generally the right way to go, but since it came from the right address I'm OK with it this time.


This patch allows Linux to act as a crash kernel for use with
kdump. Userspace will let the crash kernel know about the
memory region it can use through linux,usable-memory property
on the /memory node (overriding its reg property), and about the
memory region where the elf core header of the previous kernel
is saved, through a reserved-memory node with a compatible string
of "linux,elfcorehdr". This approach is the least invasive and
re-uses functionality already present.

I tested this on riscv64 qemu and it works as expected, you
may test it by retrieving the dmesg of the previous kernel
through /proc/vmcore, using the vmcore-dmesg utility from
kexec-tools.

v3:
* Rebase

v2:
* Use linux,usable-memory on /memory instead of a new binding
* Use a reserved-memory node for ELF core header

Signed-off-by: Nick Kossifidis <mick@xxxxxxxxxxxx>
---
arch/riscv/Kconfig | 10 ++++++++
arch/riscv/kernel/Makefile | 1 +
arch/riscv/kernel/crash_dump.c | 46 ++++++++++++++++++++++++++++++++++
arch/riscv/kernel/setup.c | 12 +++++++++
arch/riscv/mm/init.c | 33 ++++++++++++++++++++++++
5 files changed, 102 insertions(+)
create mode 100644 arch/riscv/kernel/crash_dump.c

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 3716262ef..553c2dced 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -403,6 +403,16 @@ config KEXEC

The name comes from the similarity to the exec system call.

+config CRASH_DUMP
+ bool "Build kdump crash kernel"
+ help
+ Generate crash dump after being started by kexec. This should
+ be normally only set in special crash dump kernels which are
+ loaded in the main kernel with kexec-tools into a specially
+ reserved region and then later executed after a crash by
+ kdump/kexec.
+
+ For more details see Documentation/admin-guide/kdump/kdump.rst

endmenu

diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 07f676ad3..bd66d2ce0 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -59,6 +59,7 @@ endif
obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-${CONFIG_KEXEC} += kexec_relocate.o crash_save_regs.o machine_kexec.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump.o

obj-$(CONFIG_JUMP_LABEL) += jump_label.o

diff --git a/arch/riscv/kernel/crash_dump.c b/arch/riscv/kernel/crash_dump.c
new file mode 100644
index 000000000..86cc0ada5
--- /dev/null
+++ b/arch/riscv/kernel/crash_dump.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code comes from arch/arm64/kernel/crash_dump.c
+ * Created by: AKASHI Takahiro <takahiro.akashi@xxxxxxxxxx>
+ * Copyright (C) 2017 Linaro Limited
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/io.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is in a user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+ size_t csize, unsigned long offset,
+ int userbuf)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+
+ vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
+ if (!vaddr)
+ return -ENOMEM;
+
+ if (userbuf) {
+ if (copy_to_user((char __user *)buf, vaddr + offset, csize)) {
+ memunmap(vaddr);
+ return -EFAULT;
+ }
+ } else
+ memcpy(buf, vaddr + offset, csize);
+
+ memunmap(vaddr);
+ return csize;
+}
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 31866dce9..ff398a3d8 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -66,6 +66,9 @@ static struct resource code_res = { .name = "Kernel code", };
static struct resource data_res = { .name = "Kernel data", };
static struct resource rodata_res = { .name = "Kernel rodata", };
static struct resource bss_res = { .name = "Kernel bss", };
+#ifdef CONFIG_CRASH_DUMP
+static struct resource elfcorehdr_res = { .name = "ELF Core hdr", };
+#endif

static int __init add_resource(struct resource *parent,
struct resource *res)
@@ -169,6 +172,15 @@ static void __init init_resources(void)
}
#endif

+#ifdef CONFIG_CRASH_DUMP
+ if (elfcorehdr_size > 0) {
+ elfcorehdr_res.start = elfcorehdr_addr;
+ elfcorehdr_res.end = elfcorehdr_addr + elfcorehdr_size - 1;
+ elfcorehdr_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ add_resource(&iomem_resource, &elfcorehdr_res);
+ }
+#endif
+
for_each_reserved_mem_region(region) {
res = &mem_res[res_idx--];

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index e71b35cec..f66011816 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -13,6 +13,7 @@
#include <linux/swap.h>
#include <linux/sizes.h>
#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
#include <linux/libfdt.h>
#include <linux/set_memory.h>
#include <linux/dma-map-ops.h>
@@ -606,6 +607,18 @@ static void __init reserve_crashkernel(void)

int ret = 0;

+ /*
+ * Don't reserve a region for a crash kernel on a crash kernel
+ * since it doesn't make much sense and we have limited memory
+ * resources.
+ */
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel()) {
+ pr_info("crashkernel: ignoring reservation request\n");
+ return;
+ }
+#endif
+
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
&crash_size, &crash_base);
if (ret || !crash_size)
@@ -660,6 +673,26 @@ static void __init reserve_crashkernel(void)
}
#endif /* CONFIG_KEXEC_CORE */

+#ifdef CONFIG_CRASH_DUMP
+/*
+ * We keep track of the ELF core header of the crashed
+ * kernel with a reserved-memory region with compatible
+ * string "linux,elfcorehdr". Here we register a callback
+ * to populate elfcorehdr_addr/size when this region is
+ * present. Note that this region will be marked as
+ * reserved once we call early_init_fdt_scan_reserved_mem()
+ * later on.
+ */
+static int elfcore_hdr_setup(struct reserved_mem *rmem)
+{
+ elfcorehdr_addr = rmem->base;
+ elfcorehdr_size = rmem->size;
+ return 0;
+}
+
+RESERVEDMEM_OF_DECLARE(elfcorehdr, "linux,elfcorehdr", elfcore_hdr_setup);
+#endif
+
void __init paging_init(void)
{
setup_vm_final();