Re: [PATCH 1/3] powerpc/mm: fix a warning when a cache is common to PGD and hugepages

From: Christophe LEROY
Date: Tue Aug 14 2018 - 09:27:49 EST




Le 13/08/2018 Ã 15:44, Aneesh Kumar K.V a ÃcritÂ:
On 08/13/2018 06:57 PM, Christophe Leroy wrote:
While implementing TLB miss HW assistance on the 8xx, the following
warning was encountered:

[Â 423.732965] WARNING: CPU: 0 PID: 345 at mm/slub.c:2412 ___slab_alloc.constprop.30+0x26c/0x46c
[Â 423.733033] CPU: 0 PID: 345 Comm: mmap Not tainted 4.18.0-rc8-00664-g2dfff9121c55 #671
[Â 423.733075] NIP:Â c0108f90 LR: c0109ad0 CTR: 00000004
[Â 423.733121] REGS: c455bba0 TRAP: 0700ÂÂ Not tainted (4.18.0-rc8-00664-g2dfff9121c55)
[Â 423.733147] MSR:Â 00021032 <ME,IR,DR,RI>Â CR: 24224848Â XER: 20000000
[Â 423.733319]
[Â 423.733319] GPR00: c0109ad0 c455bc50 c4521910 c60053c0 007080c0 c0011b34 c7fa41e0 c455be30
[Â 423.733319] GPR08: 00000001 c00103a0 c7fa41e0 c49afcc4 24282842 10018840 c079b37c 00000040
[Â 423.733319] GPR16: 73f00000 00210d00 00000000 00000001 c455a000 00000100 00000200 c455a000
[Â 423.733319] GPR24: c60053c0 c0011b34 007080c0 c455a000 c455a000 c7fa41e0 00000000 00009032
[Â 423.734190] NIP [c0108f90] ___slab_alloc.constprop.30+0x26c/0x46c
[Â 423.734257] LR [c0109ad0] kmem_cache_alloc+0x210/0x23c
[Â 423.734283] Call Trace:
[Â 423.734326] [c455bc50] [00000100] 0x100 (unreliable)
[Â 423.734430] [c455bcc0] [c0109ad0] kmem_cache_alloc+0x210/0x23c
[Â 423.734543] [c455bcf0] [c0011b34] huge_pte_alloc+0xc0/0x1dc
[Â 423.734633] [c455bd20] [c01044dc] hugetlb_fault+0x408/0x48c
[Â 423.734720] [c455bdb0] [c0104b20] follow_hugetlb_page+0x14c/0x44c
[Â 423.734826] [c455be10] [c00e8e54] __get_user_pages+0x1c4/0x3dc
[Â 423.734919] [c455be80] [c00e9924] __mm_populate+0xac/0x140
[Â 423.735020] [c455bec0] [c00db14c] vm_mmap_pgoff+0xb4/0xb8
[Â 423.735127] [c455bf00] [c00f27c0] ksys_mmap_pgoff+0xcc/0x1fc
[Â 423.735222] [c455bf40] [c000e0f8] ret_from_syscall+0x0/0x38
[Â 423.735271] Instruction dump:
[Â 423.735321] 7cbf482e 38fd0008 7fa6eb78 7fc4f378 4bfff5dd 7fe3fb78 4bfffe24 81370010
[Â 423.735536] 71280004 41a2ff88 4840c571 4bffff80 <0fe00000> 4bfffeb8 81340010 712a0004
[Â 423.735757] ---[ end trace e9b222919a470790 ]---

This warning occurs when calling kmem_cache_zalloc() on a
cache having a constructor.

In this case it happens because PGD cache and 512k hugepte cache are
the same size (4k). While a cache with constructor is created for
the PGD, hugepages create cache without constructor and uses
kmem_cache_zalloc(). As both expect a cache with the same size,
the hugepages reuse the cache created for PGD, hence the conflict.

As the constructors only aim at zeroing the allocated memory, this
patch fixes this issue by removing the constructors and using
kmem_cache_zalloc() instead.


But that means we zero out on each alloc from the slab right? Earlier we allocated we we added memory to the slab. Also we have code that carefully zero things out when we free the page table back to slab.
The idea there was, it is better take the cost of zeroing out during free rather than fault.

Ok, then it means we have to do it the other way round and change
hugetlb to use cache with constructors as well.

At first look, it seems quite tricky to do it though. The constructor doesn't get the size of the cache, so it means we need one constructor for each possible size.

Christophe



Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxx>
---
 arch/powerpc/include/asm/book3s/32/pgalloc.h | 2 +-
 arch/powerpc/include/asm/book3s/64/pgalloc.h | 4 ++--
 arch/powerpc/include/asm/nohash/32/pgalloc.h | 2 +-
 arch/powerpc/include/asm/nohash/64/pgalloc.h | 6 +++---
 arch/powerpc/mm/init-common.c | 21 +++------------------
 5 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 82e44b1a00ae..4c23cc1ae7a1 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -32,7 +32,7 @@ extern struct kmem_cache *pgtable_cache[];

 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-ÂÂÂ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ÂÂÂ return kmem_cache_zalloc(PGT_CACHE(PGD_INDEX_SIZE),
ÂÂÂÂÂÂÂÂÂÂÂÂÂ pgtable_gfp_flags(mm, GFP_KERNEL));
 }

diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 76234a14b97d..074359cd632a 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -81,7 +81,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
ÂÂÂÂÂ if (radix_enabled())
ÂÂÂÂÂÂÂÂÂ return radix__pgd_alloc(mm);

-ÂÂÂ pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ÂÂÂ pgd = kmem_cache_zalloc(PGT_CACHE(PGD_INDEX_SIZE),
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pgtable_gfp_flags(mm, GFP_KERNEL));
ÂÂÂÂÂ /*
ÂÂÂÂÂÂ * Don't scan the PGD for pointers, it contains references to PUDs but
@@ -120,7 +120,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
ÂÂÂÂÂ pud_t *pud;

-ÂÂÂ pud = kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
+ÂÂÂ pud = kmem_cache_zalloc(PGT_CACHE(PUD_CACHE_INDEX),
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pgtable_gfp_flags(mm, GFP_KERNEL));
ÂÂÂÂÂ /*
ÂÂÂÂÂÂ * Tell kmemleak to ignore the PUD, that means don't scan it for
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 8825953c225b..766cf0c90d19 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -32,7 +32,7 @@ extern struct kmem_cache *pgtable_cache[];

 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-ÂÂÂ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ÂÂÂ return kmem_cache_zalloc(PGT_CACHE(PGD_INDEX_SIZE),
ÂÂÂÂÂÂÂÂÂÂÂÂÂ pgtable_gfp_flags(mm, GFP_KERNEL));
 }

diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index e2d62d033708..54ee5ac02d81 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -43,7 +43,7 @@ extern struct kmem_cache *pgtable_cache[];

 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-ÂÂÂ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ÂÂÂ return kmem_cache_zalloc(PGT_CACHE(PGD_INDEX_SIZE),
ÂÂÂÂÂÂÂÂÂÂÂÂÂ pgtable_gfp_flags(mm, GFP_KERNEL));
 }

@@ -56,7 +56,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)

 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-ÂÂÂ return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+ÂÂÂ return kmem_cache_zalloc(PGT_CACHE(PUD_INDEX_SIZE),
ÂÂÂÂÂÂÂÂÂÂÂÂÂ pgtable_gfp_flags(mm, GFP_KERNEL));
 }

@@ -86,7 +86,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,

 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-ÂÂÂ return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
+ÂÂÂ return kmem_cache_zalloc(PGT_CACHE(PMD_CACHE_INDEX),
ÂÂÂÂÂÂÂÂÂÂÂÂÂ pgtable_gfp_flags(mm, GFP_KERNEL));
 }

diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 2b656e67f2ea..2ae15ff8f76f 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -25,21 +25,6 @@
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>

-static void pgd_ctor(void *addr)
-{
-ÂÂÂ memset(addr, 0, PGD_TABLE_SIZE);
-}
-
-static void pud_ctor(void *addr)
-{
-ÂÂÂ memset(addr, 0, PUD_TABLE_SIZE);
-}
-
-static void pmd_ctor(void *addr)
-{
-ÂÂÂ memset(addr, 0, PMD_TABLE_SIZE);
-}
-
 struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
 EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */

@@ -91,15 +76,15 @@ EXPORT_SYMBOL_GPL(pgtable_cache_add);ÂÂÂ /* used by kvm_hv module */

 void pgtable_cache_init(void)
 {
-ÂÂÂ pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
+ÂÂÂ pgtable_cache_add(PGD_INDEX_SIZE, NULL);

ÂÂÂÂÂ if (PMD_CACHE_INDEX && !PGT_CACHE(PMD_CACHE_INDEX))
-ÂÂÂÂÂÂÂ pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
+ÂÂÂÂÂÂÂ pgtable_cache_add(PMD_CACHE_INDEX, NULL);
ÂÂÂÂÂ /*
ÂÂÂÂÂÂ * In all current configs, when the PUD index exists it's the
ÂÂÂÂÂÂ * same size as either the pgd or pmd index except with THP enabled
ÂÂÂÂÂÂ * on book3s 64
ÂÂÂÂÂÂ */
ÂÂÂÂÂ if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
-ÂÂÂÂÂÂÂ pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);
+ÂÂÂÂÂÂÂ pgtable_cache_add(PUD_CACHE_INDEX, NULL);
 }