Re: [PATCH] mm: avoid zeroing user movable page twice with init_on_alloc=1

From: Zi Yan
Date: Thu Dec 05 2024 - 12:33:19 EST


On 5 Dec 2024, at 3:19, Geert Uytterhoeven wrote:

> Hi Zi,
>
> On Wed, Dec 4, 2024 at 5:58 PM Zi Yan <ziy@xxxxxxxxxx> wrote:
>> On 4 Dec 2024, at 11:29, Matthew Wilcox wrote:
>>> On Wed, Dec 04, 2024 at 11:16:51AM -0500, Zi Yan wrote:
>>>>> So maybe the clearing done as part of page allocator isn't enough here.
>>>>>
>>>> Basically, mips needs to flush data cache if kmap address is aliased to
>>>
>>> People use "aliased" in contronym ways. Do you mean "has a
>>> non-congruent alias" or "has a congruent alias"?
>>
>> I mean if kmap address goes into a different cache line than userspace
>> address, a cache flush is needed to make sure data is visible to
>> userspace.
>>
>>>
>>>> userspace address. This means when mips has THP on, the patch below
>>>> is not enough to fix the issue.
>>>>
>>>> In post_alloc_hook(), it does not make sense to pass userspace address
>>>> in to determine whether to flush dcache or not.
>>>>
>>>> One way to fix it is to add something like arch_userpage_post_alloc()
>>>> to flush dcache if kmap address is aliased to userspace address.
>>>> But my questions are that
>>>> 1) if kmap address will always be the same for two separate kmap_local() calls,
>>>
>>> No. It just takes the next address in the stack.
>>
>> So this fix will not work, since it is possible that first kmap and second
>> kmap have different pages_do_alias() return values.
>>
>> Another way would be to make a special case for mips, like below.
>> But that looks ugly, let me think about it more.
>>
>> diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
>> index bc3e3484c1bf..ef3c6f0b9159 100644
>> --- a/arch/mips/include/asm/page.h
>> +++ b/arch/mips/include/asm/page.h
>> @@ -95,6 +95,19 @@ struct vm_area_struct;
>> extern void copy_user_highpage(struct page *to, struct page *from,
>> unsigned long vaddr, struct vm_area_struct *vma);
>>
>> +struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
>> + unsigned long vaddr)
>> + {
>> + struct folio *folio;
>> +
>> + folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr);
>> + if (folio)
>> + clear_user_highpage(&folio->page, vaddr);
>> +
>> + return folio;
>> + }
>> +#define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio
>> +
>> #define __HAVE_ARCH_COPY_USER_HIGHPAGE
>>
>> /*
>> diff --git a/mm/internal.h b/mm/internal.h
>> index cb8d8e8e3ffa..d513fa683aa3 100644
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -1287,7 +1287,8 @@ void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
>>
>> static inline bool alloc_zeroed(void)
>> {
>> - return static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
>> + return !IS_ENABLED(CONFIG_MIPS) &&
>> + static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
>> &init_on_alloc);
>> }
>
> After adding a missing static inline, #include <linux/gfp.h>, and still
> getting compile failures, I gave up...

Sorry about that.

Can you try the patch below (it compiles locally for mips and x86) to see
if your issue is fixed? Can you please make THP always on in your config,
since THP is also affected by the same issue? The patch you tested only
fixed non THP config.

Thanks. I appreciate your help. :)

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 6e452bd8e7e3..d9beb8371daa 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -224,7 +224,13 @@ static inline
struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
unsigned long vaddr)
{
- return vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr);
+ struct folio *folio;
+
+ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr);
+ if (folio && alloc_need_zeroing())
+ clear_user_highpage(&folio->page, vaddr);
+
+ return folio;
}
#endif

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c39c4945946c..6ac0308c4380 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4175,6 +4175,23 @@ static inline int do_mseal(unsigned long start, size_t len_in, unsigned long fla
}
#endif

+/*
+ * alloc_need_zeroing checks if a user folio from page allocator needs to be
+ * zeroed or not.
+ */
+static inline bool alloc_need_zeroing(void)
+{
+ /*
+ * for user folios, arch with cache aliasing requires cache flush and
+ * arc sets folio->flags, so always return false to make caller use
+ * clear_user_page()/clear_user_highpage()
+ */
+ return (IS_ENABLED(CONFIG_ARCH_HAS_CPU_CACHE_ALIASING) ||
+ IS_ENABLED(CONFIG_ARC)) ||
+ !static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
+ &init_on_alloc);
+}
+
int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status);
int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status);
int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ee335d96fc39..107130a5413a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1176,11 +1176,12 @@ static struct folio *vma_alloc_anon_folio_pmd(struct vm_area_struct *vma,
folio_throttle_swaprate(folio, gfp);

/*
- * When a folio is not zeroed during allocation (__GFP_ZERO not used),
- * folio_zero_user() is used to make sure that the page corresponding
- * to the faulting address will be hot in the cache after zeroing.
+ * When a folio is not zeroed during allocation (__GFP_ZERO not used)
+ * or user folios require special handling, folio_zero_user() is used to
+ * make sure that the page corresponding to the faulting address will be
+ * hot in the cache after zeroing.
*/
- if (!alloc_zeroed())
+ if (alloc_need_zeroing())
folio_zero_user(folio, addr);
/*
* The memory barrier inside __folio_mark_uptodate makes sure that
diff --git a/mm/internal.h b/mm/internal.h
index cb8d8e8e3ffa..3bd08bafad04 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1285,12 +1285,6 @@ void touch_pud(struct vm_area_struct *vma, unsigned long addr,
void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, bool write);

-static inline bool alloc_zeroed(void)
-{
- return static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
- &init_on_alloc);
-}
-
/*
* Parses a string with mem suffixes into its order. Useful to parse kernel
* parameters.
diff --git a/mm/memory.c b/mm/memory.c
index 75c2dfd04f72..cf1611791856 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4733,12 +4733,12 @@ static struct folio *alloc_anon_folio(struct vm_fault *vmf)
folio_throttle_swaprate(folio, gfp);
/*
* When a folio is not zeroed during allocation
- * (__GFP_ZERO not used), folio_zero_user() is used
- * to make sure that the page corresponding to the
- * faulting address will be hot in the cache after
- * zeroing.
+ * (__GFP_ZERO not used) or user folios require special
+ * handling, folio_zero_user() is used to make sure
+ * that the page corresponding to the faulting address
+ * will be hot in the cache after zeroing.
*/
- if (!alloc_zeroed())
+ if (alloc_need_zeroing())
folio_zero_user(folio, vmf->address);
return folio;
}



Best Regards,
Yan, Zi