Re: [PATCH v7 1/4] vmalloc: Add __vmalloc_node_try_addr function
From: Edgecombe, Rick P
Date: Fri Oct 05 2018 - 13:18:49 EST
Forgot to include this:
Reviewed-by: Kees Cook <keescook@xxxxxxxxxxxx>
On Mon, 2018-10-01 at 14:38 -0700, Rick Edgecombe wrote:
> Create __vmalloc_node_try_addr function that tries to allocate at a specific
> address and supports caller specified behavior for whether any lazy purging
> happens if there is a collision.
>
> This new function draws from the __vmalloc_node_range implementation. Attempts
> to merge the two into a single allocator resulted in logic that was difficult
> to follow, so they are left separate.
>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@xxxxxxxxx>
> ---
> Âinclude/linux/vmalloc.h |ÂÂÂ3 +
> Âmm/vmalloc.cÂÂÂÂÂÂÂÂÂÂÂÂ| 177
> +++++++++++++++++++++++++++++++++++++++++++++++-
> Â2 files changed, 179 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> index 398e9c9..c7712c8 100644
> --- a/include/linux/vmalloc.h
> +++ b/include/linux/vmalloc.h
> @@ -82,6 +82,9 @@ extern void *__vmalloc_node_range(unsigned long size,
> unsigned long align,
> Â unsigned long start, unsigned long end, gfp_t
> gfp_mask,
> Â pgprot_t prot, unsigned long vm_flags, int node,
> Â const void *caller);
> +extern void *__vmalloc_node_try_addr(unsigned long addr, unsigned long size,
> + gfp_t gfp_mask, pgprot_t prot, unsigned long
> vm_flags,
> + int node, int try_purge, const void *caller);
> Â#ifndef CONFIG_MMU
> Âextern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags);
> Âstatic inline void *__vmalloc_node_flags_caller(unsigned long size, int node,
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index a728fc4..1954458 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1709,6 +1709,181 @@ static void *__vmalloc_area_node(struct vm_struct
> *area, gfp_t gfp_mask,
> Â return NULL;
> Â}
> Â
> +static bool pvm_find_next_prev(unsigned long end,
> + ÂÂÂÂÂÂÂstruct vmap_area **pnext,
> + ÂÂÂÂÂÂÂstruct vmap_area **pprev);
> +
> +/* Try to allocate a region of KVA of the specified address and size. */
> +static struct vmap_area *try_alloc_vmap_area(unsigned long addr,
> + unsigned long size, int node, gfp_t gfp_mask,
> + int try_purge)
> +{
> + struct vmap_area *va;
> + struct vmap_area *cur_va = NULL;
> + struct vmap_area *first_before = NULL;
> + int need_purge = 0;
> + int blocked = 0;
> + int purged = 0;
> + unsigned long addr_end;
> +
> + WARN_ON(!size);
> + WARN_ON(offset_in_page(size));
> +
> + addr_end = addr + size;
> + if (addr > addr_end)
> + return ERR_PTR(-EOVERFLOW);
> +
> + might_sleep();
> +
> + va = kmalloc_node(sizeof(struct vmap_area),
> + gfp_mask & GFP_RECLAIM_MASK, node);
> + if (unlikely(!va))
> + return ERR_PTR(-ENOMEM);
> +
> + /*
> + Â* Only scan the relevant parts containing pointers to other objects
> + Â* to avoid false negatives.
> + Â*/
> + kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask &
> GFP_RECLAIM_MASK);
> +
> +retry:
> + spin_lock(&vmap_area_lock);
> +
> + pvm_find_next_prev(addr, &cur_va, &first_before);
> +
> + if (!cur_va)
> + goto found;
> +
> + /*
> + Â* If there is no VA that starts before the target address, start the
> + Â* check from the closest VA in order to cover the case where the
> + Â* allocation overlaps at the end.
> + Â*/
> + if (first_before && addr < first_before->va_end)
> + cur_va = first_before;
> +
> + /* Linearly search through to make sure there is a hole */
> + while (cur_va->va_start < addr_end) {
> + if (cur_va->va_end > addr) {
> + if (cur_va->flags & VM_LAZY_FREE) {
> + need_purge = 1;
> + } else {
> + blocked = 1;
> + break;
> + }
> + }
> +
> + if (list_is_last(&cur_va->list, &vmap_area_list))
> + break;
> +
> + cur_va = list_next_entry(cur_va, list);
> + }
> +
> + /*
> + Â* If a non-lazy free va blocks the allocation, or
> + Â* we are not supposed to purge, but we need to, the
> + Â* allocation fails.
> + Â*/
> + if (blocked || (need_purge && !try_purge))
> + goto fail;
> +
> + if (try_purge && need_purge) {
> + /* if purged once before, give up */
> + if (purged)
> + goto fail;
> +
> + /*
> + Â* If the va blocking the allocation is set to
> + Â* be purged then purge all vmap_areas that are
> + Â* set to purged since this will flush the TLBs
> + Â* anyway.
> + Â*/
> + spin_unlock(&vmap_area_lock);
> + purge_vmap_area_lazy();
> + need_purge = 0;
> + purged = 1;
> + goto retry;
> + }
> +
> +found:
> + va->va_start = addr;
> + va->va_end = addr_end;
> + va->flags = 0;
> + __insert_vmap_area(va);
> + spin_unlock(&vmap_area_lock);
> +
> + return va;
> +fail:
> + spin_unlock(&vmap_area_lock);
> + kfree(va);
> + if (need_purge && !blocked)
> + return ERR_PTR(-EUCLEAN);
> + return ERR_PTR(-EBUSY);
> +}
> +
> +/**
> + * __vmalloc_try_addrÂÂ-ÂÂtry to alloc at a specific address
> + * @addr: address to try
> + * @size: size to try
> + * @gfp_mask: flags for the page level allocator
> + * @prot: protection mask for the allocated pages
> + * @vm_flags: additional vm area flags (e.g. %VM_NO_GUARD)
> + * @node: node to use for allocation or NUMA_NO_NODE
> + * @try_purge: try to purge if needed to fulfill and allocation
> + * @caller: caller's return address
> + *
> + * Try to allocate at the specific address. If it succeeds the address
> is
> + * returned. If it fails an EBUSY ERR_PTR is returned. If try_purge is
> + * zero, it will return an EUCLEAN ERR_PTR if it could have allocated
> if it
> + * was allowed to purge. It may trigger TLB flushes if a purge is
> needed,
> + * and try_purge is set.
> + */
> +void *__vmalloc_node_try_addr(unsigned long addr, unsigned long size,
> + gfp_t gfp_mask, pgprot_t prot, unsigned long
> vm_flags,
> + int node, int try_purge, const void *caller)
> +{
> + struct vmap_area *va;
> + struct vm_struct *area;
> + void *alloc_addr;
> + unsigned long real_size = size;
> +
> + size = PAGE_ALIGN(size);
> + if (!size || (size >> PAGE_SHIFT) > totalram_pages)
> + return NULL;
> +
> + WARN_ON(in_interrupt());
> +
> + if (!(vm_flags & VM_NO_GUARD))
> + size += PAGE_SIZE;
> +
> + va = try_alloc_vmap_area(addr, size, node, gfp_mask, try_purge);
> + if (IS_ERR(va))
> + goto fail;
> +
> + area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK,
> node);
> + if (unlikely(!area)) {
> + warn_alloc(gfp_mask, NULL, "kmalloc: allocation failure");
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + setup_vmalloc_vm(area, va, vm_flags, caller);
> +
> + alloc_addr = __vmalloc_area_node(area, gfp_mask, prot, node);
> + if (!alloc_addr) {
> + warn_alloc(gfp_mask, NULL,
> + "vmalloc: allocation failure: %lu bytes", real_size);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + clear_vm_uninitialized_flag(area);
> +
> + kmemleak_vmalloc(area, real_size, gfp_mask);
> +
> + return alloc_addr;
> +fail:
> + return va;
> +}
> +
> Â/**
> Â * __vmalloc_node_rangeÂÂ-ÂÂallocate virtually contiguous memory
> Â * @size: allocation size
> @@ -2355,7 +2530,6 @@ void free_vm_area(struct vm_struct *area)
> Â}
> ÂEXPORT_SYMBOL_GPL(free_vm_area);
> Â
> -#ifdef CONFIG_SMP
> Âstatic struct vmap_area *node_to_va(struct rb_node *n)
> Â{
> Â return rb_entry_safe(n, struct vmap_area, rb_node);
> @@ -2403,6 +2577,7 @@ static bool pvm_find_next_prev(unsigned long end,
> Â return true;
> Â}
> Â
> +#ifdef CONFIG_SMP
> Â/**
> Â * pvm_determine_end - find the highest aligned address between two
> vmap_areas
> Â * @pnext: in/out arg for the next vmap_area