Re: [PATCH v4 03/30] kho: drop notifiers

From: Pratyush Yadav

Date: Tue Oct 07 2025 - 08:10:08 EST


On Mon, Oct 06 2025, Pasha Tatashin wrote:

> On Mon, Oct 6, 2025 at 1:01 PM Pratyush Yadav <pratyush@xxxxxxxxxx> wrote:
>>
>> On Mon, Sep 29 2025, Pasha Tatashin wrote:
>>
>> > From: "Mike Rapoport (Microsoft)" <rppt@xxxxxxxxxx>
>> >
>> > The KHO framework uses a notifier chain as the mechanism for clients to
>> > participate in the finalization process. While this works for a single,
>> > central state machine, it is too restrictive for kernel-internal
>> > components like pstore/reserve_mem or IMA. These components need a
>> > simpler, direct way to register their state for preservation (e.g.,
>> > during their initcall) without being part of a complex,
>> > shutdown-time notifier sequence. The notifier model forces all
>> > participants into a single finalization flow and makes direct
>> > preservation from an arbitrary context difficult.
>> > This patch refactors the client participation model by removing the
>> > notifier chain and introducing a direct API for managing FDT subtrees.
>> >
>> > The core kho_finalize() and kho_abort() state machine remains, but
>> > clients now register their data with KHO beforehand.
>> >
>> > Signed-off-by: Mike Rapoport (Microsoft) <rppt@xxxxxxxxxx>
>> > Signed-off-by: Pasha Tatashin <pasha.tatashin@xxxxxxxxxx>
>> [...]
>> > diff --git a/mm/memblock.c b/mm/memblock.c
>> > index e23e16618e9b..c4b2d4e4c715 100644
>> > --- a/mm/memblock.c
>> > +++ b/mm/memblock.c
>> > @@ -2444,53 +2444,18 @@ int reserve_mem_release_by_name(const char *name)
>> > #define MEMBLOCK_KHO_FDT "memblock"
>> > #define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
>> > #define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
>> > -static struct page *kho_fdt;
>> > -
>> > -static int reserve_mem_kho_finalize(struct kho_serialization *ser)
>> > -{
>> > - int err = 0, i;
>> > -
>> > - for (i = 0; i < reserved_mem_count; i++) {
>> > - struct reserve_mem_table *map = &reserved_mem_table[i];
>> > - struct page *page = phys_to_page(map->start);
>> > - unsigned int nr_pages = map->size >> PAGE_SHIFT;
>> > -
>> > - err |= kho_preserve_pages(page, nr_pages);
>> > - }
>> > -
>> > - err |= kho_preserve_folio(page_folio(kho_fdt));
>> > - err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt));
>> > -
>> > - return notifier_from_errno(err);
>> > -}
>> > -
>> > -static int reserve_mem_kho_notifier(struct notifier_block *self,
>> > - unsigned long cmd, void *v)
>> > -{
>> > - switch (cmd) {
>> > - case KEXEC_KHO_FINALIZE:
>> > - return reserve_mem_kho_finalize((struct kho_serialization *)v);
>> > - case KEXEC_KHO_ABORT:
>> > - return NOTIFY_DONE;
>> > - default:
>> > - return NOTIFY_BAD;
>> > - }
>> > -}
>> > -
>> > -static struct notifier_block reserve_mem_kho_nb = {
>> > - .notifier_call = reserve_mem_kho_notifier,
>> > -};
>> >
>> > static int __init prepare_kho_fdt(void)
>> > {
>> > int err = 0, i;
>> > + struct page *fdt_page;
>> > void *fdt;
>> >
>> > - kho_fdt = alloc_page(GFP_KERNEL);
>> > - if (!kho_fdt)
>> > + fdt_page = alloc_page(GFP_KERNEL);
>> > + if (!fdt_page)
>> > return -ENOMEM;
>> >
>> > - fdt = page_to_virt(kho_fdt);
>> > + fdt = page_to_virt(fdt_page);
>> >
>> > err |= fdt_create(fdt, PAGE_SIZE);
>> > err |= fdt_finish_reservemap(fdt);
>> > @@ -2499,7 +2464,10 @@ static int __init prepare_kho_fdt(void)
>> > err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
>> > for (i = 0; i < reserved_mem_count; i++) {
>> > struct reserve_mem_table *map = &reserved_mem_table[i];
>> > + struct page *page = phys_to_page(map->start);
>> > + unsigned int nr_pages = map->size >> PAGE_SHIFT;
>> >
>> > + err |= kho_preserve_pages(page, nr_pages);
>> > err |= fdt_begin_node(fdt, map->name);
>> > err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE);
>> > err |= fdt_property(fdt, "start", &map->start, sizeof(map->start));
>> > @@ -2507,13 +2475,14 @@ static int __init prepare_kho_fdt(void)
>> > err |= fdt_end_node(fdt);
>> > }
>> > err |= fdt_end_node(fdt);
>> > -
>> > err |= fdt_finish(fdt);
>> >
>> > + err |= kho_preserve_folio(page_folio(fdt_page));
>> > + err |= kho_add_subtree(MEMBLOCK_KHO_FDT, fdt);
>> > +
>> > if (err) {
>> > pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
>> > - put_page(kho_fdt);
>> > - kho_fdt = NULL;
>> > + put_page(fdt_page);
>>
>> This adds subtree to KHO even if the FDT might be invalid. And then
>> leaves a dangling reference in KHO to the FDT in case of an error. I
>> think you should either do this check after
>> kho_preserve_folio(page_folio(fdt_page)) and do a clean error check for
>> kho_add_subtree(), or call kho_remove_subtree() in the error block.
>
> I agree, I do not like these err |= stuff, we should be checking
> errors cleanly, and do proper clean-ups.

Yeah, this is mainly a byproduct of using FDTs. Getting and setting
simple properties also needs error checking and that can get tedious
real quick. Which is why this pattern has shown up I suppose.

>
>> I prefer the former since if kho_add_subtree() is the one that fails,
>> there is little sense in removing a subtree that was never added.
>>
>> > }
>> >
>> > return err;
>> > @@ -2529,13 +2498,6 @@ static int __init reserve_mem_init(void)
>> > err = prepare_kho_fdt();
>> > if (err)
>> > return err;
>> > -
>> > - err = register_kho_notifier(&reserve_mem_kho_nb);
>> > - if (err) {
>> > - put_page(kho_fdt);
>> > - kho_fdt = NULL;
>> > - }
>> > -
>> > return err;
>> > }
>> > late_initcall(reserve_mem_init);
>>
>> --
>> Regards,
>> Pratyush Yadav

--
Regards,
Pratyush Yadav