[RFC PATCH v2 1/1] mm/vmalloc: Introduce vmap_file()
From: Vishal Moola (Oracle)
Date: Fri Mar 28 2025 - 17:15:52 EST
vmap_file() is effectively an in-kernel equivalent to calling mmap()
on a file. A user can pass in a file mapping, and vmap_file() will map
the specified portion of that file directly to kernel virtual space.
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@xxxxxxxxx>
---
include/linux/vmalloc.h | 2 +
mm/vmalloc.c | 113 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 115 insertions(+)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 31e9ffd936e3..d5420985865f 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -192,6 +192,8 @@ extern void vfree_atomic(const void *addr);
extern void *vmap(struct page **pages, unsigned int count,
unsigned long flags, pgprot_t prot);
+void *vmap_file(struct address_space *mapping, loff_t start, loff_t end,
+ unsigned long flags, pgprot_t prot);
void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot);
extern void vunmap(const void *addr);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 3ed720a787ec..b94489032ab5 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3475,6 +3475,119 @@ void *vmap(struct page **pages, unsigned int count,
}
EXPORT_SYMBOL(vmap);
+/**
+ * vmap_file - map all folios in a file to virtually contiguous space.
+ * @mapping: The address space to map.
+ * @start: The starting byte.
+ * @end: The final byte to map.
+ * @flags: vm_area->flags.
+ * @prot: page protection for the mapping.
+ *
+ * Maps a file into contiguous kernel virtual space. The caller is expected
+ * to ensure that the folios caching the file are present and uptodate. The
+ * folios must remain so until the file is unmapped.
+ *
+ * If @start or @end are not PAGE_ALIGNED, vmap_file() will round
+ * @start down and @end up to encompass the desired pages. The
+ * address returned is always PAGE_ALIGNED.
+ *
+ * Return: the address of the area or %NULL on failure.
+ */
+void *vmap_file(struct address_space *mapping, loff_t start, loff_t end,
+ unsigned long flags, pgprot_t prot)
+{
+ struct vm_struct *area;
+ struct folio *folio;
+ unsigned long addr, end_addr;
+ const pgoff_t first = start >> PAGE_SHIFT;
+ const pgoff_t last = end >> PAGE_SHIFT;
+ XA_STATE(xas, &mapping->i_pages, first);
+
+ unsigned long size = (last - first + 1) << PAGE_SHIFT;
+
+ if (WARN_ON_ONCE(flags & VM_FLUSH_RESET_PERMS))
+ return NULL;
+
+ /*
+ * Your top guard is someone else's bottom guard. Not having a top
+ * guard compromises someone else's mappings too.
+ */
+ if (WARN_ON_ONCE(flags & VM_NO_GUARD))
+ flags &= ~VM_NO_GUARD;
+
+ area = get_vm_area_caller(size, flags, __builtin_return_address(0));
+ if (!area)
+ return NULL;
+
+ addr = (unsigned long) area->addr;
+ end_addr = addr + size;
+
+ rcu_read_lock();
+ xas_for_each(&xas, folio, last) {
+ phys_addr_t map_start;
+ int map_size, err;
+ bool pmd_bound, is_first_map;
+
+ if (xas_retry(&xas, folio))
+ continue;
+ if (!folio || xa_is_value(folio) ||
+ !folio_test_uptodate(folio))
+ goto out;
+
+ is_first_map = (addr == (unsigned long) area->addr);
+ map_start = folio_pfn(folio) << PAGE_SHIFT;
+ map_size = folio_size(folio);
+
+ /* We can unconditionally calculate values for the first
+ * folio. This lets us handle skipping pages in the first
+ * folio without verifying addresses every iteration.
+ */
+ if (is_first_map) {
+ map_size -= (first - folio->index) << PAGE_SHIFT;
+ map_start += (first - folio->index) << PAGE_SHIFT;
+ }
+
+ if (addr + map_size > end_addr)
+ map_size = end_addr - addr;
+
+ /* We need to check if this folio will cross the pmd boundary.
+ * If it does, we drop the rcu lock to allow for a new page
+ * table allocation.
+ */
+
+ pmd_bound = is_first_map ||
+ (IS_ALIGNED(addr, PMD_SIZE)) ||
+ ((addr & PMD_MASK) !=
+ ((addr + map_size) & PMD_MASK));
+
+ if (pmd_bound) {
+ xas_pause(&xas);
+ rcu_read_unlock();
+ }
+
+ err = vmap_range_noflush(addr, addr + map_size,
+ map_start, prot, PAGE_SHIFT);
+
+ if (pmd_bound)
+ rcu_read_lock();
+
+ if (err) {
+ vunmap(area->addr);
+ area->addr = NULL;
+ goto out;
+ }
+
+ addr += map_size;
+ }
+
+out:
+ rcu_read_unlock();
+ flush_cache_vmap((unsigned long)area->addr, end_addr);
+
+ return area->addr;
+}
+EXPORT_SYMBOL_GPL(vmap_file);
+
#ifdef CONFIG_VMAP_PFN
struct vmap_pfn_data {
unsigned long *pfns;
--
2.48.1