Re: [PATCH 1/1] mm/vmalloc: Add preempt point in purge_vmap_node() when enabling kasan
From: Adrian Huang
Date: Wed Jul 24 2024 - 08:46:40 EST
> It works great and does not generate the soft-lock-up splat :)
> See below some comments:
Great. Thanks for the confirmation.
<snip>
>> + kasan_release_vmalloc(start, end, start, end, KASAN_VMALLOC_TLB_FLUSH);
>> +
>>
> Do we need it here? We just did the TLB flush for en entire range in the
> __purge_vmap_area_lazy(). So, it is two times invoked and looks odd to me.
>
> Am i missing something?
1. The TLB flush for the entire range in __purge_vmap_area_lazy() is for
the vmalloc virtual address (VMALLOC_START->VMALLOC_END).
2. The TLB flush in purge_vmap_node() is for the KASAN shadow virtual address
(the shadow offset 'CONFIG_KASAN_SHADOW_OFFSET' is defined in .config).
BTW, I found my first patch has the potential risk. We need to flush TLB of
the KASAN shadow virtual address firstly. Please see the following patch for
detail. (I put the comment in the following patch). The following patch
also works well on my 256-core machine.
If you're ok with the patch, I'll submit it for upstream review. And, may I
have your tag(s): tested-by/reviewed-by? (If possible, could you please have
a test for the following patch).
Thanks.
---
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 70d6a8f6e25d..ddbf42a1a7b7 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -55,6 +55,9 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D];
int kasan_populate_early_shadow(const void *shadow_start,
const void *shadow_end);
+#define KASAN_VMALLOC_PAGE_RANGE 0x1 /* Apply exsiting page range */
+#define KASAN_VMALLOC_TLB_FLUSH 0x2 /* TLB flush */
+
#ifndef kasan_mem_to_shadow
static inline void *kasan_mem_to_shadow(const void *addr)
{
@@ -511,7 +514,8 @@ void kasan_populate_early_vm_area_shadow(void *start, unsigned long size);
int kasan_populate_vmalloc(unsigned long addr, unsigned long size);
void kasan_release_vmalloc(unsigned long start, unsigned long end,
unsigned long free_region_start,
- unsigned long free_region_end);
+ unsigned long free_region_end,
+ unsigned long flags);
#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
@@ -526,7 +530,8 @@ static inline int kasan_populate_vmalloc(unsigned long start,
static inline void kasan_release_vmalloc(unsigned long start,
unsigned long end,
unsigned long free_region_start,
- unsigned long free_region_end) { }
+ unsigned long free_region_end,
+ unsigned long flags) { }
#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
@@ -561,7 +566,8 @@ static inline int kasan_populate_vmalloc(unsigned long start,
static inline void kasan_release_vmalloc(unsigned long start,
unsigned long end,
unsigned long free_region_start,
- unsigned long free_region_end) { }
+ unsigned long free_region_end,
+ unsigned long flags) { }
static inline void *kasan_unpoison_vmalloc(const void *start,
unsigned long size,
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index d6210ca48dda..88d1c9dcb507 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -489,7 +489,8 @@ static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
*/
void kasan_release_vmalloc(unsigned long start, unsigned long end,
unsigned long free_region_start,
- unsigned long free_region_end)
+ unsigned long free_region_end,
+ unsigned long flags)
{
void *shadow_start, *shadow_end;
unsigned long region_start, region_end;
@@ -522,12 +523,17 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
__memset(shadow_start, KASAN_SHADOW_INIT, shadow_end - shadow_start);
return;
}
- apply_to_existing_page_range(&init_mm,
+
+
+ if (flags & KASAN_VMALLOC_PAGE_RANGE)
+ apply_to_existing_page_range(&init_mm,
(unsigned long)shadow_start,
size, kasan_depopulate_vmalloc_pte,
NULL);
- flush_tlb_kernel_range((unsigned long)shadow_start,
- (unsigned long)shadow_end);
+
+ if (flags & KASAN_VMALLOC_TLB_FLUSH)
+ flush_tlb_kernel_range((unsigned long)shadow_start,
+ (unsigned long)shadow_end);
}
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e34ea860153f..12cdc92cdb83 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2193,8 +2193,22 @@ static void purge_vmap_node(struct work_struct *work)
struct vmap_area *va, *n_va;
LIST_HEAD(local_list);
+ unsigned long start;
+ unsigned long end;
+
vn->nr_purged = 0;
+ start = list_first_entry(&vn->purge_list, struct vmap_area, list)->va_start;
+
+ end = list_last_entry(&vn->purge_list, struct vmap_area, list)->va_end;
+
+ /*
+ * Since node_pool_add_va() returns vmap_area(s) to its pool, the
+ * returned vmap_area(s) might be grabbed immediately via node_alloc()
+ * by other core. We need to flush TLB firstly.
+ */
+ kasan_release_vmalloc(start, end, start, end, KASAN_VMALLOC_TLB_FLUSH);
+
list_for_each_entry_safe(va, n_va, &vn->purge_list, list) {
unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
unsigned long orig_start = va->va_start;
@@ -2205,7 +2219,8 @@ static void purge_vmap_node(struct work_struct *work)
if (is_vmalloc_or_module_addr((void *)orig_start))
kasan_release_vmalloc(orig_start, orig_end,
- va->va_start, va->va_end);
+ va->va_start, va->va_end,
+ KASAN_VMALLOC_PAGE_RANGE);
atomic_long_sub(nr, &vmap_lazy_nr);
vn->nr_purged++;
@@ -4726,7 +4741,8 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
&free_vmap_area_list);
if (va)
kasan_release_vmalloc(orig_start, orig_end,
- va->va_start, va->va_end);
+ va->va_start, va->va_end,
+ KASAN_VMALLOC_PAGE_RANGE | KASAN_VMALLOC_TLB_FLUSH);
vas[area] = NULL;
}
@@ -4776,7 +4792,8 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
&free_vmap_area_list);
if (va)
kasan_release_vmalloc(orig_start, orig_end,
- va->va_start, va->va_end);
+ va->va_start, va->va_end,
+ KASAN_VMALLOC_PAGE_RANGE | KASAN_VMALLOC_TLB_FLUSH);
vas[area] = NULL;
kfree(vms[area]);
}