[PATCH 1/5] mm: allow arch refinement/skip for vmap alloc
From: Maxwell Bland
Date: Tue Apr 16 2024 - 13:25:19 EST
Makes red black tree allocation more flexible on a per-architecture
basis by introducing an optional hooks to refine the red-black tree
structuring and exposing vmalloc functions for clipping vmap areas,
finding vmap areas, and inserting vmap areas.
With this patch, the red-black vmap tree can be refined to account for
architecture-specific memory management operations, most notably address
space layout randomization, as these features conflict with generic
management of a single vmalloc_start to vmalloc_end range as given by
mm/vmalloc.c.
For example, x86 is forced to restrict aslr to 1024 possible locations,
which is a very, very small number, and arm64 breaks standard code/data
partitioning altogether, which prevents the enforcement of performant
immmutability on kernel page tables.
Signed-off-by: Maxwell Bland <mbland@xxxxxxxxxxxx>
---
include/linux/vmalloc.h | 24 ++++++++++++++++++++++++
mm/vmalloc.c | 16 ++++++++++------
2 files changed, 34 insertions(+), 6 deletions(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 98ea90e90439..3c5ce7ee0bea 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -12,6 +12,7 @@
#include <asm/vmalloc.h>
+struct kmem_cache;
struct vm_area_struct; /* vma defining user mapping in mm_types.h */
struct notifier_block; /* in notifier.h */
struct iov_iter; /* in uio.h */
@@ -125,6 +126,21 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
}
#endif
+#ifndef arch_skip_va
+static inline bool arch_skip_va(struct vmap_area *va, unsigned long vstart)
+{
+ return false;
+}
+#endif
+
+#ifndef arch_refine_vmap_space
+static inline void arch_refine_vmap_space(struct rb_root *root,
+ struct list_head *head,
+ struct kmem_cache *cachep)
+{
+}
+#endif
+
/*
* Highlevel APIs for driver use
*/
@@ -214,6 +230,14 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
void free_vm_area(struct vm_struct *area);
extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr);
+extern void insert_vmap_area_augment(struct vmap_area *va, struct rb_node *from,
+ struct rb_root *root,
+ struct list_head *head);
+extern int va_clip(struct rb_root *root, struct list_head *head,
+ struct vmap_area *va, unsigned long nva_start_addr,
+ unsigned long size);
+extern struct vmap_area *__find_vmap_area(unsigned long addr,
+ struct rb_root *root);
struct vmap_area *find_vmap_area(unsigned long addr);
static inline bool is_vm_area_hugepages(const void *addr)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 68fa001648cc..de4577a3708e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -989,7 +989,7 @@ unsigned long vmalloc_nr_pages(void)
return atomic_long_read(&nr_vmalloc_pages);
}
-static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
+struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
{
struct rb_node *n = root->rb_node;
@@ -1322,7 +1322,7 @@ insert_vmap_area(struct vmap_area *va,
link_va(va, root, parent, link, head);
}
-static void
+void
insert_vmap_area_augment(struct vmap_area *va,
struct rb_node *from, struct rb_root *root,
struct list_head *head)
@@ -1501,7 +1501,7 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
vstart < va->va_start) {
node = node->rb_left;
} else {
- if (is_within_this_va(va, size, align, vstart))
+ if (!arch_skip_va(va, vstart) && is_within_this_va(va, size, align, vstart))
return va;
/*
@@ -1522,7 +1522,8 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
*/
while ((node = rb_parent(node))) {
va = rb_entry(node, struct vmap_area, rb_node);
- if (is_within_this_va(va, size, align, vstart))
+ if (!arch_skip_va(va, vstart) &&
+ is_within_this_va(va, size, align, vstart))
return va;
if (get_subtree_max_size(node->rb_right) >= length &&
@@ -1554,7 +1555,7 @@ find_vmap_lowest_linear_match(struct list_head *head, unsigned long size,
struct vmap_area *va;
list_for_each_entry(va, head, list) {
- if (!is_within_this_va(va, size, align, vstart))
+ if (arch_skip_va(va, vstart) || !is_within_this_va(va, size, align, vstart))
continue;
return va;
@@ -1617,7 +1618,7 @@ classify_va_fit_type(struct vmap_area *va,
return type;
}
-static __always_inline int
+__always_inline int
va_clip(struct rb_root *root, struct list_head *head,
struct vmap_area *va, unsigned long nva_start_addr,
unsigned long size)
@@ -5129,4 +5130,7 @@ void __init vmalloc_init(void)
vmap_node_shrinker->count_objects = vmap_node_shrink_count;
vmap_node_shrinker->scan_objects = vmap_node_shrink_scan;
shrinker_register(vmap_node_shrinker);
+
+ arch_refine_vmap_space(&free_vmap_area_root, &free_vmap_area_list,
+ vmap_area_cachep);
}
--
2.39.2