Re: [PATCH v5 1/5] kasan: support backing vmalloc space with real shadow memory

From: Daniel Axtens
Date: Fri Aug 30 2019 - 10:14:39 EST


Hi all,

> +static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
> + void *unused)
> +{
> + unsigned long page;
> +
> + page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT);
> +
> + spin_lock(&init_mm.page_table_lock);
> +
> + /*
> + * we want to catch bugs where we end up clearing a pte that wasn't
> + * set. This will unfortunately also fire if we are releasing a region
> + * where we had a failure allocating the shadow region.
> + */
> + WARN_ON_ONCE(pte_none(*ptep));
> +
> + pte_clear(&init_mm, addr, ptep);
> + free_page(page);
> + spin_unlock(&init_mm.page_table_lock);

It's just occurred to me that the free_page really needs to be guarded
by an 'if (likely(!pte_none(*pte))) {' - there won't be a page to free
if there's no pte.

I'll spin v6 on Monday.

Regards,
Daniel

> +
> + return 0;
> +}
> +
> +/*
> + * Release the backing for the vmalloc region [start, end), which
> + * lies within the free region [free_region_start, free_region_end).
> + *
> + * This can be run lazily, long after the region was freed. It runs
> + * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap
> + * infrastructure.
> + */
> +void kasan_release_vmalloc(unsigned long start, unsigned long end,
> + unsigned long free_region_start,
> + unsigned long free_region_end)
> +{
> + void *shadow_start, *shadow_end;
> + unsigned long region_start, region_end;
> +
> + /* we start with shadow entirely covered by this region */
> + region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
> + region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
> +
> + /*
> + * We don't want to extend the region we release to the entire free
> + * region, as the free region might cover huge chunks of vmalloc space
> + * where we never allocated anything. We just want to see if we can
> + * extend the [start, end) range: if start or end fall part way through
> + * a shadow page, we want to check if we can free that entire page.
> + */
> +
> + free_region_start = ALIGN(free_region_start,
> + PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
> +
> + if (start != region_start &&
> + free_region_start < region_start)
> + region_start -= PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE;
> +
> + free_region_end = ALIGN_DOWN(free_region_end,
> + PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
> +
> + if (end != region_end &&
> + free_region_end > region_end)
> + region_end += PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE;
> +
> + shadow_start = kasan_mem_to_shadow((void *)region_start);
> + shadow_end = kasan_mem_to_shadow((void *)region_end);
> +
> + if (shadow_end > shadow_start)
> + apply_to_page_range(&init_mm, (unsigned long)shadow_start,
> + (unsigned long)(shadow_end - shadow_start),
> + kasan_depopulate_vmalloc_pte, NULL);
> +}
> +#endif
> diff --git a/mm/kasan/generic_report.c b/mm/kasan/generic_report.c
> index 36c645939bc9..2d97efd4954f 100644
> --- a/mm/kasan/generic_report.c
> +++ b/mm/kasan/generic_report.c
> @@ -86,6 +86,9 @@ static const char *get_shadow_bug_type(struct kasan_access_info *info)
> case KASAN_ALLOCA_RIGHT:
> bug_type = "alloca-out-of-bounds";
> break;
> + case KASAN_VMALLOC_INVALID:
> + bug_type = "vmalloc-out-of-bounds";
> + break;
> }
>
> return bug_type;
> diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
> index 35cff6bbb716..3a083274628e 100644
> --- a/mm/kasan/kasan.h
> +++ b/mm/kasan/kasan.h
> @@ -25,6 +25,7 @@
> #endif
>
> #define KASAN_GLOBAL_REDZONE 0xFA /* redzone for global variable */
> +#define KASAN_VMALLOC_INVALID 0xF9 /* unallocated space in vmapped page */
>
> /*
> * Stack redzone shadow values
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index b8101030f79e..bf806566cad0 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -690,8 +690,19 @@ merge_or_add_vmap_area(struct vmap_area *va,
> struct list_head *next;
> struct rb_node **link;
> struct rb_node *parent;
> + unsigned long orig_start, orig_end;
> bool merged = false;
>
> + /*
> + * To manage KASAN vmalloc memory usage, we use this opportunity to
> + * clean up the shadow memory allocated to back this allocation.
> + * Because a vmalloc shadow page covers several pages, the start or end
> + * of an allocation might not align with a shadow page. Use the merging
> + * opportunities to try to extend the region we can release.
> + */
> + orig_start = va->va_start;
> + orig_end = va->va_end;
> +
> /*
> * Find a place in the tree where VA potentially will be
> * inserted, unless it is merged with its sibling/siblings.
> @@ -741,6 +752,10 @@ merge_or_add_vmap_area(struct vmap_area *va,
> if (sibling->va_end == va->va_start) {
> sibling->va_end = va->va_end;
>
> + kasan_release_vmalloc(orig_start, orig_end,
> + sibling->va_start,
> + sibling->va_end);
> +
> /* Check and update the tree if needed. */
> augment_tree_propagate_from(sibling);
>
> @@ -754,6 +769,8 @@ merge_or_add_vmap_area(struct vmap_area *va,
> }
>
> insert:
> + kasan_release_vmalloc(orig_start, orig_end, va->va_start, va->va_end);
> +
> if (!merged) {
> link_va(va, root, parent, link, head);
> augment_tree_propagate_from(va);
> @@ -2068,6 +2085,22 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
>
> setup_vmalloc_vm(area, va, flags, caller);
>
> + /*
> + * For KASAN, if we are in vmalloc space, we need to cover the shadow
> + * area with real memory. If we come here through VM_ALLOC, this is
> + * done by a higher level function that has access to the true size,
> + * which might not be a full page.
> + *
> + * We assume module space comes via VM_ALLOC path.
> + */
> + if (is_vmalloc_addr(area->addr) && !(area->flags & VM_ALLOC)) {
> + if (kasan_populate_vmalloc(area->size, area)) {
> + unmap_vmap_area(va);
> + kfree(area);
> + return NULL;
> + }
> + }
> +
> return area;
> }
>
> @@ -2245,6 +2278,9 @@ static void __vunmap(const void *addr, int deallocate_pages)
> debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
> debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
>
> + if (area->flags & VM_KASAN)
> + kasan_poison_vmalloc(area->addr, area->size);
> +
> vm_remove_mappings(area, deallocate_pages);
>
> if (deallocate_pages) {
> @@ -2495,6 +2531,9 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
> if (!addr)
> return NULL;
>
> + if (kasan_populate_vmalloc(real_size, area))
> + return NULL;
> +
> /*
> * In this function, newly allocated vm_struct has VM_UNINITIALIZED
> * flag. It means that vm_struct is not fully initialized.
> @@ -3349,10 +3388,14 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
> spin_unlock(&vmap_area_lock);
>
> /* insert all vm's */
> - for (area = 0; area < nr_vms; area++)
> + for (area = 0; area < nr_vms; area++) {
> setup_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
> pcpu_get_vm_areas);
>
> + /* assume success here */
> + kasan_populate_vmalloc(sizes[area], vms[area]);
> + }
> +
> kfree(vas);
> return vms;
>
> --
> 2.20.1