Re: [PATCH 1/5] mm: allow arch refinement/skip for vmap alloc

From: Uladzislau Rezki
Date: Thu Apr 18 2024 - 04:55:30 EST


On Tue, Apr 02, 2024 at 03:15:01PM -0500, Maxwell Bland wrote:
> Makes red black tree allocation more flexible on a per-architecture
> basis by introducing an optional hooks to refine the red-black tree
> structuring and exposing vmalloc functions for clipping vmap areas,
> finding vmap areas, and inserting vmap areas.
>
> With this patch, the red-black vmap tree can be refined to account for
> architecture-specific memory management operations, most notably address
> space layout randomization, as these features conflict with generic
> management of a single vmalloc_start to vmalloc_end range as given by
> mm/vmalloc.c.
>
> For example, x86 is forced to restrict aslr to 1024 possible locations,
> which is a very, very small number, and arm64 breaks standard code/data
> partitioning altogether, which prevents the enforcement of performant
> immmutability on kernel page tables.
>
> Signed-off-by: Maxwell Bland <mbland@xxxxxxxxxxxx>
> ---
> include/linux/vmalloc.h | 24 ++++++++++++++++++++++++
> mm/vmalloc.c | 16 ++++++++++------
> 2 files changed, 34 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> index 98ea90e90439..3c5ce7ee0bea 100644
> --- a/include/linux/vmalloc.h
> +++ b/include/linux/vmalloc.h
> @@ -12,6 +12,7 @@
>
> #include <asm/vmalloc.h>
>
> +struct kmem_cache;
> struct vm_area_struct; /* vma defining user mapping in mm_types.h */
> struct notifier_block; /* in notifier.h */
> struct iov_iter; /* in uio.h */
> @@ -125,6 +126,21 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
> }
> #endif
>
> +#ifndef arch_skip_va
> +static inline bool arch_skip_va(struct vmap_area *va, unsigned long vstart)
> +{
> + return false;
> +}
> +#endif
> +
> +#ifndef arch_refine_vmap_space
> +static inline void arch_refine_vmap_space(struct rb_root *root,
> + struct list_head *head,
> + struct kmem_cache *cachep)
> +{
> +}
> +#endif
> +
> /*
> * Highlevel APIs for driver use
> */
> @@ -214,6 +230,14 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
> void free_vm_area(struct vm_struct *area);
> extern struct vm_struct *remove_vm_area(const void *addr);
> extern struct vm_struct *find_vm_area(const void *addr);
> +extern void insert_vmap_area_augment(struct vmap_area *va, struct rb_node *from,
> + struct rb_root *root,
> + struct list_head *head);
> +extern int va_clip(struct rb_root *root, struct list_head *head,
> + struct vmap_area *va, unsigned long nva_start_addr,
> + unsigned long size);
> +extern struct vmap_area *__find_vmap_area(unsigned long addr,
> + struct rb_root *root);
>
To me it looks like you want to make internal functions as public for
everyone which is not good, imho.

> struct vmap_area *find_vmap_area(unsigned long addr);
>
> static inline bool is_vm_area_hugepages(const void *addr)
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 68fa001648cc..de4577a3708e 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -989,7 +989,7 @@ unsigned long vmalloc_nr_pages(void)
> return atomic_long_read(&nr_vmalloc_pages);
> }
>
> -static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
> +struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
> {
> struct rb_node *n = root->rb_node;
>
> @@ -1322,7 +1322,7 @@ insert_vmap_area(struct vmap_area *va,
> link_va(va, root, parent, link, head);
> }
>
> -static void
> +void
> insert_vmap_area_augment(struct vmap_area *va,
> struct rb_node *from, struct rb_root *root,
> struct list_head *head)
> @@ -1501,7 +1501,7 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
> vstart < va->va_start) {
> node = node->rb_left;
> } else {
> - if (is_within_this_va(va, size, align, vstart))
> + if (!arch_skip_va(va, vstart) && is_within_this_va(va, size, align, vstart))
> return va;
>
> /*
> @@ -1522,7 +1522,8 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
> */
> while ((node = rb_parent(node))) {
> va = rb_entry(node, struct vmap_area, rb_node);
> - if (is_within_this_va(va, size, align, vstart))
> + if (!arch_skip_va(va, vstart) &&
> + is_within_this_va(va, size, align, vstart))
> return va;
>
> if (get_subtree_max_size(node->rb_right) >= length &&
> @@ -1554,7 +1555,7 @@ find_vmap_lowest_linear_match(struct list_head *head, unsigned long size,
> struct vmap_area *va;
>
> list_for_each_entry(va, head, list) {
> - if (!is_within_this_va(va, size, align, vstart))
> + if (arch_skip_va(va, vstart) || !is_within_this_va(va, size, align, vstart))
> continue;
>
arch_skip_va() injections into the search algorithm sounds like a hack
and might lead(if i do not miss something, need to check closer) to alloc
failures when we go toward a reserved VA but we are not allowed to allocate
from.

> return va;
> @@ -1617,7 +1618,7 @@ classify_va_fit_type(struct vmap_area *va,
> return type;
> }
>
> -static __always_inline int
> +__always_inline int
> va_clip(struct rb_root *root, struct list_head *head,
> struct vmap_area *va, unsigned long nva_start_addr,
> unsigned long size)
> @@ -5129,4 +5130,7 @@ void __init vmalloc_init(void)
> vmap_node_shrinker->count_objects = vmap_node_shrink_count;
> vmap_node_shrinker->scan_objects = vmap_node_shrink_scan;
> shrinker_register(vmap_node_shrinker);
> +
> + arch_refine_vmap_space(&free_vmap_area_root, &free_vmap_area_list,
> + vmap_area_cachep);
> }
>
Why do not you allocate just using a specific range from MODULES_ASLR_START
till VMALLOC_END?

Thanks!

--
Uladzislau Rezki