Re: [PATCHv6 04/13] kexec_file: Use bpf-prog to decompose image
From: Philipp Rudo
Date: Thu Feb 26 2026 - 08:42:58 EST
Hi Pingfan,
On Mon, 19 Jan 2026 11:24:15 +0800
Pingfan Liu <piliu@xxxxxxxxxx> wrote:
[...]
> diff --git a/kernel/kexec_bpf_loader.c b/kernel/kexec_bpf_loader.c
> new file mode 100644
> index 0000000000000..dc59e1389da94
> --- /dev/null
> +++ b/kernel/kexec_bpf_loader.c
> @@ -0,0 +1,161 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Kexec image bpf section helpers
> + *
> + * Copyright (C) 2025, 2026 Red Hat, Inc
> + */
> +
> +#define pr_fmt(fmt) "kexec_file(Image): " fmt
> +
> +#include <linux/err.h>
> +#include <linux/errno.h>
> +#include <linux/list.h>
> +#include <linux/kernel.h>
> +#include <linux/vmalloc.h>
> +#include <linux/kexec.h>
> +#include <linux/elf.h>
> +#include <linux/string.h>
> +#include <linux/bpf.h>
> +#include <linux/filter.h>
> +#include <asm/byteorder.h>
> +#include <asm/image.h>
> +#include <asm/memory.h>
> +#include "kexec_internal.h"
> +
> +/* Load a ELF */
> +static int arm_bpf_prog(char *bpf_elf, unsigned long sz)
> +{
> + return 0;
> +}
> +
> +static void disarm_bpf_prog(void)
> +{
> +}
> +
> +struct kexec_context {
> + bool kdump;
> + char *kernel;
> + int kernel_sz;
> + char *initrd;
> + int initrd_sz;
> + char *cmdline;
> + int cmdline_sz;
> +};
> +
> +void kexec_image_parser_anchor(struct kexec_context *context,
> + unsigned long parser_id);
> +
> +/*
> + * optimize("O0") prevents inline, compiler constant propagation
> + *
> + * Let bpf be the program context pointer so that it will not be spilled into
> + * stack.
> + */
> +__attribute__((used, optimize("O0"))) void kexec_image_parser_anchor(
> + struct kexec_context *context,
> + unsigned long parser_id)
> +{
> + /*
> + * To prevent linker from Identical Code Folding (ICF) with kexec_image_parser_anchor,
> + * making them have different code.
> + */
> + volatile int dummy = 0;
> +
> + dummy += 1;
> +}
> +
> +
> +BTF_KFUNCS_START(kexec_modify_return_ids)
> +BTF_ID_FLAGS(func, kexec_image_parser_anchor, KF_SLEEPABLE)
> +BTF_KFUNCS_END(kexec_modify_return_ids)
> +
> +static const struct btf_kfunc_id_set kexec_modify_return_set = {
> + .owner = THIS_MODULE,
> + .set = &kexec_modify_return_ids,
> +};
> +
> +static int __init kexec_bpf_prog_run_init(void)
> +{
> + return register_btf_fmodret_id_set(&kexec_modify_return_set);
> +}
> +late_initcall(kexec_bpf_prog_run_init);
> +
> +static int kexec_buff_parser(struct bpf_parser_context *parser)
> +{
> + return 0;
> +}
> +
> +/* At present, only PE format file with .bpf section is supported */
> +#define file_has_bpf_section pe_has_bpf_section
> +#define file_get_section pe_get_section
> +
> +int decompose_kexec_image(struct kimage *image, int extended_fd)
> +{
> + struct kexec_context context = { 0 };
> + struct bpf_parser_context *bpf;
> + unsigned long kernel_sz, bpf_sz;
> + char *kernel_start, *bpf_start;
> + int ret = 0;
> +
> + if (image->type != KEXEC_TYPE_CRASH)
> + context.kdump = false;
> + else
> + context.kdump = true;
> +
> + kernel_start = image->kernel_buf;
> + kernel_sz = image->kernel_buf_len;
> +
> + while (file_has_bpf_section(kernel_start, kernel_sz)) {
> +
> + bpf = alloc_bpf_parser_context(kexec_buff_parser, &context);
> + if (!bpf)
> + return -ENOMEM;
> + file_get_section((const char *)kernel_start, ".bpf", &bpf_start, &bpf_sz);
> + if (!!bpf_sz) {
> + /* load and attach bpf-prog */
> + ret = arm_bpf_prog(bpf_start, bpf_sz);
> + if (ret) {
> + put_bpf_parser_context(bpf);
> + pr_err("Fail to load .bpf section\n");
> + goto err;
> + }
> + }
I'm not sure this works as intended. In case a .bpf section exists but
bpf_sz is 0, the function will skip arming the bpf-prog but still
continue. That doesn't look right to me. IIUC a zero size bpf-prog
should be an error. Or am I missing something?
Thanks
Philipp
> + context.kernel = kernel_start;
> + context.kernel_sz = kernel_sz;
> + /* bpf-prog fentry, which handle above buffers. */
> + kexec_image_parser_anchor(&context, (unsigned long)bpf);
> +
> + /*
> + * Container may be nested and should be unfold one by one.
> + * The former bpf-prog should prepare 'kernel', 'initrd',
> + * 'cmdline' for the next phase by calling kexec_buff_parser()
> + */
> + kernel_start = context.kernel;
> + kernel_sz = context.kernel_sz;
> +
> + /*
> + * detach the current bpf-prog from their attachment points.
> + */
> + disarm_bpf_prog();
> + put_bpf_parser_context(bpf);
> + }
> +
> + /*
> + * image's kernel_buf, initrd_buf, cmdline_buf are set. Now they should
> + * be updated to the new content.
> + */
> + image->kernel_buf = context.kernel;
> + image->kernel_buf_len = context.kernel_sz;
> + image->initrd_buf = context.initrd;
> + image->initrd_buf_len = context.initrd_sz;
> + image->cmdline_buf = context.cmdline;
> + image->cmdline_buf_len = context.cmdline_sz;
> +
> + return 0;
> +err:
> + vfree(context.kernel);
> + vfree(context.initrd);
> + vfree(context.cmdline);
> + return ret;
> +}
> +
> diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
> index 0222d17072d40..f9674bb5bd8db 100644
> --- a/kernel/kexec_file.c
> +++ b/kernel/kexec_file.c
> @@ -238,7 +238,14 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
> goto out;
> #endif
>
> - /* Call arch image probe handlers */
> + if (IS_ENABLED(CONFIG_KEXEC_BPF))
> + decompose_kexec_image(image, initrd_fd);
> +
> + /*
> + * From this point, the kexec subsystem handle the kernel boot protocol.
> + *
> + * Call arch image probe handlers
> + */
> ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
> image->kernel_buf_len);
> if (ret)
> diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
> index 8e5e5c1237732..ee01d0c8bb377 100644
> --- a/kernel/kexec_internal.h
> +++ b/kernel/kexec_internal.h
> @@ -39,6 +39,7 @@ extern size_t kexec_purgatory_size;
> extern bool pe_has_bpf_section(const char *file_buf, unsigned long pe_sz);
> extern int pe_get_section(const char *file_buf, const char *sect_name,
> char **sect_start, unsigned long *sect_sz);
> +extern int decompose_kexec_image(struct kimage *image, int extended_fd);
> #else /* CONFIG_KEXEC_FILE */
> static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
> #endif /* CONFIG_KEXEC_FILE */