Re: [PATCH v11 11/15] arm64: kexec_file: add crash dump support

From: James Morse
Date: Wed Jul 18 2018 - 12:50:34 EST


Hi Akashi,

On 11/07/18 08:41, AKASHI Takahiro wrote:
> Enabling crash dump (kdump) includes
> * prepare contents of ELF header of a core dump file, /proc/vmcore,
> using crash_prepare_elf64_headers(), and
> * add two device tree properties, "linux,usable-memory-range" and
> "linux,elfcorehdr", which represent respectively a memory range
> to be used by crash dump kernel and the header's location

> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
> index 69333694e3e2..eeb5766928b0 100644
> --- a/arch/arm64/include/asm/kexec.h
> +++ b/arch/arm64/include/asm/kexec.h
> @@ -99,6 +99,10 @@ static inline void crash_post_resume(void) {}
> struct kimage_arch {
> phys_addr_t dtb_mem;
> void *dtb_buf;
> + /* Core ELF header buffer */

> + void *elf_headers;

Shouldn't this be a phys_addr_t if it comes from kbuf.mem?
(dtb_mem is, and they type tells us which way round the runtime/kexec-time
pointers are)


> + unsigned long elf_headers_sz;
> + unsigned long elf_load_addr;
> };
>
> /**


> diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
> index a0b44fe18b95..261564df7210 100644
> --- a/arch/arm64/kernel/machine_kexec_file.c
> +++ b/arch/arm64/kernel/machine_kexec_file.c
> @@ -132,6 +173,45 @@ static int setup_dtb(struct kimage *image,
> return ret;
> }
>
> +static int prepare_elf_headers(void **addr, unsigned long *sz)
> +{
> + struct crash_mem *cmem;
> + unsigned int nr_ranges;
> + int ret;
> + u64 i;
> + phys_addr_t start, end;

> + nr_ranges = 1; /* for exclusion of crashkernel region */
> + for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, 0,
> + &start, &end, NULL)

Nit: flags = MEMBLOCK_NONE? Just to make it obvious this is how MEMBLOCK_NOMAP
regions are weeded out.

This is going to get interesting if we ever support hotpluggable memory... but
it works for now and implicitly removes the nomap regions.


> + nr_ranges++;

> +
> + cmem = kmalloc(sizeof(struct crash_mem) +
> + sizeof(struct crash_mem_range) * nr_ranges, GFP_KERNEL);
> + if (!cmem)
> + return -ENOMEM;
> +
> + cmem->max_nr_ranges = nr_ranges;
> + cmem->nr_ranges = 0;
> + for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, 0,
> + &start, &end, NULL) {
> + cmem->ranges[cmem->nr_ranges].start = start;
> + cmem->ranges[cmem->nr_ranges].end = end - 1;
> + cmem->nr_ranges++;
> + }
> +
> + /* Exclude crashkernel region */
> + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);


> + if (ret)
> + goto out;
> +
> + ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
> +
> +out:

Nit: You could save the goto if you wrote this as:
| if (!ret)
| ret = crash_prepare_elf64_headers(cmem, true, addr, sz);


> + kfree(cmem);
> + return ret;
> +}
> +
> int load_other_segments(struct kimage *image,
> unsigned long kernel_load_addr,
> unsigned long kernel_size,
> @@ -139,11 +219,43 @@ int load_other_segments(struct kimage *image,
> char *cmdline, unsigned long cmdline_len)
> {
> struct kexec_buf kbuf;
> + void *hdrs_addr;
> + unsigned long hdrs_sz;
> unsigned long initrd_load_addr = 0;
> char *dtb = NULL;
> unsigned long dtb_len = 0;
> int ret = 0;
>
> + /* load elf core header */
> + if (image->type == KEXEC_TYPE_CRASH) {
> + ret = prepare_elf_headers(&hdrs_addr, &hdrs_sz);
> + if (ret) {
> + pr_err("Preparing elf core header failed\n");
> + goto out_err;
> + }
> +
> + kbuf.image = image;
> + kbuf.buffer = hdrs_addr;
> + kbuf.bufsz = hdrs_sz;
> + kbuf.memsz = hdrs_sz;

> + kbuf.buf_align = PAGE_SIZE;

Whose PAGE_SIZE?

Won't this break if the kdump kernel is 64K pages, but the first kernel uses 4K?
Should we change this to the largest supported PAGE_SIZE: SZ_64K?


> + kbuf.buf_min = crashk_res.start;
> + kbuf.buf_max = crashk_res.end + 1;
> + kbuf.top_down = true;
> +
> + ret = kexec_add_buffer(&kbuf);
> + if (ret) {
> + vfree(hdrs_addr);
> + goto out_err;
> + }
> + image->arch.elf_headers = hdrs_addr;
> + image->arch.elf_headers_sz = hdrs_sz;
> + image->arch.elf_load_addr = kbuf.mem;
> +
> + pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
> + image->arch.elf_load_addr, hdrs_sz, hdrs_sz);
> + }
> +
> kbuf.image = image;
> /* not allocate anything below the kernel */
> kbuf.buf_min = kernel_load_addr + kernel_size;


I think the initramfs can escape the crash kernel range because you add to the
buf_max region:
| /* within 1GB-aligned window of up to 32GB in size */
| kbuf.buf_max = round_down(kernel_load_addr, SZ_1G)
| â+ (unsigned long)SZ_1G * 32;


I think we need a helper to clamp these min/max ranges to within the crash
kernel range, as its needs doing in a few places.


Thanks,

James