Re: [PATCH 1/1] x86/vmemmap: Use direct-mapped VA instead of vmemmap-based VA
From: Harry Yoo (Oracle)
Date: Tue Feb 18 2025 - 00:27:44 EST
On Mon, Feb 17, 2025 at 01:41:33PM +0200, Gwan-gyeong Mun wrote:
> When a process leads the addition of a struct page to vmemmap
> (e.g. hot-plug), the page table update for the newly added vmemmap-based
> virtual address is updated first in init_mm's page table and then
> synchronized later.
>
> If the vmemmap-based virtual address is accessed through the process's
> page table before this sync, a page fault will occur.
>
> This translates vmemmap-based virtual address to direct-mapped virtual
> address and use it, if the current top-level page table is not init_mm's
> page table when accessing a vmemmap-based virtual address before this sync.
>
> Fixes: faf1c0008a33 ("x86/vmemmap: optimize for consecutive sections in partial populated PMDs")
> Signed-off-by: Gwan-gyeong Mun <gwan-gyeong.mun@xxxxxxxxx>
> Cc: Oscar Salvador <osalvador@xxxxxxx>
> Cc: Hyeonggon Yoo <42.hyeyoo@xxxxxxxxx>
I think this fix is reasonable without changing existing code too much.
Any thoughts from x86 folks?
> Cc: Byungchul Park <byungchul@xxxxxx>
> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
> Cc: Andy Lutomirski <luto@xxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Shouldn't we add Cc: <stable@xxxxxxxxxxxxxxx>?
--
Harry
> ---
> arch/x86/mm/init_64.c | 17 ++++++++++++++---
> 1 file changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index 01ea7c6df303..1cb26f692831 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -844,6 +844,17 @@ void __init paging_init(void)
> */
> static unsigned long unused_pmd_start __meminitdata;
>
> +static void * __meminit vmemmap_va_to_kaddr(unsigned long vmemmap_va)
> +{
> + void *kaddr = (void *)vmemmap_va;
> + pgd_t *pgd = __va(read_cr3_pa());
> +
> + if (init_mm.pgd != pgd)
> + kaddr = __va(slow_virt_to_phys(kaddr));
> +
> + return kaddr;
> +}
> +
> static void __meminit vmemmap_flush_unused_pmd(void)
> {
> if (!unused_pmd_start)
> @@ -851,7 +862,7 @@ static void __meminit vmemmap_flush_unused_pmd(void)
> /*
> * Clears (unused_pmd_start, PMD_END]
> */
> - memset((void *)unused_pmd_start, PAGE_UNUSED,
> + memset(vmemmap_va_to_kaddr(unused_pmd_start), PAGE_UNUSED,
> ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start);
> unused_pmd_start = 0;
> }
> @@ -882,7 +893,7 @@ static void __meminit __vmemmap_use_sub_pmd(unsigned long start)
> * case the first memmap never gets initialized e.g., because the memory
> * block never gets onlined).
> */
> - memset((void *)start, 0, sizeof(struct page));
> + memset(vmemmap_va_to_kaddr(start), 0, sizeof(struct page));
> }
>
> static void __meminit vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
> @@ -924,7 +935,7 @@ static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long
> * Mark with PAGE_UNUSED the unused parts of the new memmap range
> */
> if (!IS_ALIGNED(start, PMD_SIZE))
> - memset((void *)page, PAGE_UNUSED, start - page);
> + memset(vmemmap_va_to_kaddr(page), PAGE_UNUSED, start - page);
>
> /*
> * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
> --
> 2.48.1
>