Re: [PATCH v2 1/1] riscv/kasan: add KASAN_VMALLOC support

From: Alex Ghiti
Date: Wed Feb 24 2021 - 14:13:29 EST


Hi Nylon,

Le 2/22/21 à 12:13 PM, Alex Ghiti a écrit :
Le 2/21/21 à 8:37 PM, Nylon Chen a écrit :
Hi Alex, Palmer

Sorry I missed this message.
On Sun, Feb 21, 2021 at 09:38:04PM +0800, Alex Ghiti wrote:
Le 2/13/21 à 5:52 AM, Alex Ghiti a écrit :
Hi Nylon, Palmer,

Le 2/8/21 à 1:28 AM, Alex Ghiti a écrit :
Hi Nylon,

Le 1/22/21 à 10:56 PM, Palmer Dabbelt a écrit :
On Fri, 15 Jan 2021 21:58:35 PST (-0800), nylon7@xxxxxxxxxxxxx wrote:
It references to x86/s390 architecture.
So, it doesn't map the early shadow page to cover VMALLOC space.

Prepopulate top level page table for the range that would otherwise be
empty.

lower levels are filled dynamically upon memory allocation while
booting.

I think we can improve the changelog a bit here with something like that:

"KASAN vmalloc space used to be mapped using kasan early shadow page.
KASAN_VMALLOC requires the top-level of the kernel page table to be
properly populated, lower levels being filled dynamically upon memory
allocation at runtime."


Signed-off-by: Nylon Chen <nylon7@xxxxxxxxxxxxx>
Signed-off-by: Nick Hu <nickhu@xxxxxxxxxxxxx>
---
  arch/riscv/Kconfig         |  1 +
  arch/riscv/mm/kasan_init.c | 57 +++++++++++++++++++++++++++++++++++++-
  2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 81b76d44725d..15a2c8088bbe 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -57,6 +57,7 @@ config RISCV
      select HAVE_ARCH_JUMP_LABEL
      select HAVE_ARCH_JUMP_LABEL_RELATIVE
      select HAVE_ARCH_KASAN if MMU && 64BIT
+    select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
      select HAVE_ARCH_KGDB
      select HAVE_ARCH_KGDB_QXFER_PKT
      select HAVE_ARCH_MMAP_RND_BITS if MMU
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 12ddd1f6bf70..4b9149f963d3 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -9,6 +9,19 @@
  #include <linux/pgtable.h>
  #include <asm/tlbflush.h>
  #include <asm/fixmap.h>
+#include <asm/pgalloc.h>
+
+static __init void *early_alloc(size_t size, int node)
+{
+    void *ptr = memblock_alloc_try_nid(size, size,
+        __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
+
+    if (!ptr)
+        panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d
from=%llx\n",
+            __func__, size, size, node, (u64)__pa(MAX_DMA_ADDRESS));
+
+    return ptr;
+}

  extern pgd_t early_pg_dir[PTRS_PER_PGD];
  asmlinkage void __init kasan_early_init(void)
@@ -83,6 +96,40 @@ static void __init populate(void *start, void *end)
      memset(start, 0, end - start);
  }

+void __init kasan_shallow_populate(void *start, void *end)
+{
+    unsigned long vaddr = (unsigned long)start & PAGE_MASK;
+    unsigned long vend = PAGE_ALIGN((unsigned long)end);
+    unsigned long pfn;
+    int index;
+    void *p;
+    pud_t *pud_dir, *pud_k;
+    pgd_t *pgd_dir, *pgd_k;
+    p4d_t *p4d_dir, *p4d_k;
+
+    while (vaddr < vend) {
+        index = pgd_index(vaddr);
+        pfn = csr_read(CSR_SATP) & SATP_PPN;

At this point in the boot process, we know that we use swapper_pg_dir
so no need to read SATP.

+        pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index;

Here, this pgd_dir assignment is overwritten 2 lines below, so no need
for it.

+        pgd_k = init_mm.pgd + index;
+        pgd_dir = pgd_offset_k(vaddr);

pgd_offset_k(vaddr) = init_mm.pgd + pgd_index(vaddr) so pgd_k == pgd_dir.

+        set_pgd(pgd_dir, *pgd_k);
+
+        p4d_dir = p4d_offset(pgd_dir, vaddr);
+        p4d_k  = p4d_offset(pgd_k, vaddr);
+
+        vaddr = (vaddr + PUD_SIZE) & PUD_MASK;

Why do you increase vaddr *before* populating the first one ? And
pud_addr_end does that properly: it returns the next pud address if it
does not go beyond end address to map.

+        pud_dir = pud_offset(p4d_dir, vaddr);
+        pud_k = pud_offset(p4d_k, vaddr);
+
+        if (pud_present(*pud_dir)) {
+            p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+            pud_populate(&init_mm, pud_dir, p);

init_mm is not needed here.

+        }
+        vaddr += PAGE_SIZE;

Why do you need to add PAGE_SIZE ? vaddr already points to the next pud.

It seems like this patch tries to populate userspace page table
whereas at this point in the boot process, only swapper_pg_dir is used
or am I missing something ?

Thanks,

Alex

I implemented this morning a version that fixes all the comments I made
earlier. I was able to insert test_kasan_module on both sv39 and sv48
without any modification: set_pgd "goes through" all the unused page
table levels, whereas p*d_populate are noop for unused levels.

If you have any comment, do not hesitate.

diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index adbf94b7e68a..d643b222167c 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -195,6 +195,31 @@ static void __init kasan_populate(void *start, void
*end)
          memset(start, KASAN_SHADOW_INIT, end - start);
   }


+void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned
long end)
+{
+       unsigned long next;
+       void *p;
+       pgd_t *pgd_k = pgd_offset_k(vaddr);
+
+       do {
+               next = pgd_addr_end(vaddr, end);
+               if (pgd_page_vaddr(*pgd_k) == (unsigned
long)lm_alias(kasan_early_shadow_pgd_next)) {
+                       p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+                       set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)),
PAGE_TABLE));
+               }
+       } while (pgd_k++, vaddr = next, vaddr != end);
+}
+

This way of going through the page table seems to be largely used across
the kernel (cf KASAN population functions of arm64/x86) so I do think
this patch brings value to Nylon and Nick's patch.

I can propose a real patch if you agree and I'll add a co-developed by
Nylon/Nick since this only 'improves' theirs.

Thanks,

Alex

I agree with your proposal, but when I try your patch that it dosen't work
because `kasan_early_shadow_pgd_next` function wasn't define.

Oops, I messed up my rebase, please replace 'kasan_early_shadow_pgd_next' with 'kasan_early_shadow_pmd'.

Thank you for your feeback,

Alex


Did you have time to test the above fix ? It would be nice to replace your current patch with the above solution before it gets merged for 5.12, I will propose something tomorrow, feel free to review and test :)

Thanks again,

Alex


Do you have complete patch? or just I missed some content?
+void __init kasan_shallow_populate(void *start, void *end)
+{
+       unsigned long vaddr = (unsigned long)start & PAGE_MASK;
+       unsigned long vend = PAGE_ALIGN((unsigned long)end);
+
+       kasan_shallow_populate_pgd(vaddr, vend);
+
+       local_flush_tlb_all();
+}
+
   void __init kasan_init(void)
   {
          phys_addr_t _start, _end;
@@ -206,7 +231,15 @@ void __init kasan_init(void)
           */
          kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
                                      (void *)kasan_mem_to_shadow((void *)
- VMALLOC_END));
+ VMEMMAP_END));
+       if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+               kasan_shallow_populate(
+                       (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
+                       (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
+       else
+               kasan_populate_early_shadow(
+                       (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
+                       (void *)kasan_mem_to_shadow((void *)VMALLOC_END));


          /* Populate the linear mapping */
          for_each_mem_range(i, &_start, &_end) {

_______________________________________________
linux-riscv mailing list
linux-riscv@xxxxxxxxxxxxxxxxxxx
http://lists.infradead.org/mailman/listinfo/linux-riscv