Re: [RFC PATCH] x86/split_lock: Disable SLD if an unaware (out-of-tree) module enables VMX

From: Christoph Hellwig
Date: Mon Apr 06 2020 - 11:24:23 EST


On Mon, Apr 06, 2020 at 04:04:03PM +0200, Peter Zijlstra wrote:
> On Mon, Apr 06, 2020 at 05:50:10AM -0700, Christoph Hellwig wrote:
> > On Fri, Apr 03, 2020 at 09:30:07AM -0700, Sean Christopherson wrote:
> > > Hook into native CR4 writes to disable split-lock detection if CR4.VMXE
> > > is toggled on by an SDL-unaware entity, e.g. an out-of-tree hypervisor
> > > module. Most/all VMX-based hypervisors blindly reflect #AC exceptions
> > > into the guest, or don't intercept #AC in the first place. With SLD
> > > enabled, this results in unexpected #AC faults in the guest, leading to
> > > crashes in the guest and other undesirable behavior.
> >
> > Out of tree modules do not matter, so we should not add code just to
> > work around broken third party code. If you really feel strongly just
> > make sure something they rely on for their hacks stops being exported
> > and they are properly broken.
>
> Something like so then?
>
> I couldn't find any in-tree users for unmap_kernel_range*(),

zsmalloc.c can be built modular. Not that I think it should..

> and this
> removes __get_vm_area() and with the ability to custom ranges. It also
> removes map_vm_area() and replaces it with map_vm_area_nx() which kills
> adding executable maps.

The pbh/electra change looks sensible. ipu3 as said really should use
vmap, with that map_vm_area can also become unexported. In fact
with a vmap variant that allos passing the type of memory for
/proc/vmallocinfo we can just kill off map_vm_area enrirely and fold
it into the two remaining callers.

I'll prepare proper series for the vmalloc area cleanups if you don't
mind.


>
> ---
> diff --git a/arch/powerpc/include/asm/pasemi_dma.h b/arch/powerpc/include/asm/pasemi_dma.h
> index 712a0b32120f..305c54d56244 100644
> --- a/arch/powerpc/include/asm/pasemi_dma.h
> +++ b/arch/powerpc/include/asm/pasemi_dma.h
> @@ -523,4 +523,6 @@ extern void pasemi_dma_free_fun(int fun);
> /* Initialize the library, must be called before any other functions */
> extern int pasemi_dma_init(void);
>
> +extern struct vm_struct *get_phb_area(unsigned long size, unsigned long flags);
> +
> #endif /* ASM_PASEMI_DMA_H */
> diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
> index 270fa3c0d372..0aae02df061d 100644
> --- a/arch/powerpc/platforms/pasemi/dma_lib.c
> +++ b/arch/powerpc/platforms/pasemi/dma_lib.c
> @@ -617,3 +617,10 @@ int pasemi_dma_init(void)
> return err;
> }
> EXPORT_SYMBOL(pasemi_dma_init);
> +
> +struct vm_struct *get_phb_area(unsigned long size, unsigned long flags)
> +{
> + return __get_vm_area_node(size, 1, flags, PHB_IO_BASE, PHB_IO_END, NUMA_NO_NODE,
> + GFP_KERNEL, __builtin_return_address(0));
> +}
> +EXPORT_SYMBOL_GPL(get_phb_area);
> diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
> index 65c2ecd730c5..0bd5ea46f1d2 100644
> --- a/arch/x86/include/asm/pgtable_types.h
> +++ b/arch/x86/include/asm/pgtable_types.h
> @@ -274,6 +274,11 @@ typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
>
> typedef struct { pgdval_t pgd; } pgd_t;
>
> +static inline pgprot_t pgprot_nx(pgprot_t prot)
> +{
> + return __pgprot(pgprot_val(prot) | _PAGE_NX);
> +}
> +
> #ifdef CONFIG_X86_PAE
>
> /*
> diff --git a/drivers/pcmcia/electra_cf.c b/drivers/pcmcia/electra_cf.c
> index f2741c04289d..42eb242b29f1 100644
> --- a/drivers/pcmcia/electra_cf.c
> +++ b/drivers/pcmcia/electra_cf.c
> @@ -22,6 +22,8 @@
> #include <linux/of_platform.h>
> #include <linux/slab.h>
>
> +#include <asm/pasemi_dma.h>
> +
> #include <pcmcia/ss.h>
>
> static const char driver_name[] = "electra-cf";
> @@ -204,7 +206,7 @@ static int electra_cf_probe(struct platform_device *ofdev)
> cf->mem_base = ioremap(cf->mem_phys, cf->mem_size);
> cf->io_size = PAGE_ALIGN(resource_size(&io));
>
> - area = __get_vm_area(cf->io_size, 0, PHB_IO_BASE, PHB_IO_END);
> + area = get_phb_area(cf->io_size, 0);
> if (area == NULL) {
> status = -ENOMEM;
> goto fail1;
> diff --git a/drivers/staging/media/ipu3/ipu3-dmamap.c b/drivers/staging/media/ipu3/ipu3-dmamap.c
> index 7431322379f6..fc3fe85c340c 100644
> --- a/drivers/staging/media/ipu3/ipu3-dmamap.c
> +++ b/drivers/staging/media/ipu3/ipu3-dmamap.c
> @@ -124,13 +124,13 @@ void *imgu_dmamap_alloc(struct imgu_device *imgu, struct imgu_css_map *map,
> }
>
> /* Now grab a virtual region */
> - map->vma = __get_vm_area(size, VM_USERMAP, VMALLOC_START, VMALLOC_END);
> + map->vma = get_vm_area(size, VM_USERMAP);
> if (!map->vma)
> goto out_unmap;
>
> map->vma->pages = pages;
> /* And map it in KVA */
> - if (map_vm_area(map->vma, PAGE_KERNEL, pages))
> + if (map_vm_area_nx(map->vma, PAGE_KERNEL, pages))
> goto out_vunmap;
>
> map->size = size;
> diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
> index e2e2bef07dd2..ee66c1f8b141 100644
> --- a/include/asm-generic/pgtable.h
> +++ b/include/asm-generic/pgtable.h
> @@ -490,6 +490,10 @@ static inline int arch_unmap_one(struct mm_struct *mm,
> #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
> #endif
>
> +#ifndef pgprot_nx
> +#define pgprot_nx(prot) (prot)
> +#endif
> +
> #ifndef pgprot_noncached
> #define pgprot_noncached(prot) (prot)
> #endif
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 6b8eeb0ecee5..a1c5faa6042e 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -2029,7 +2029,6 @@ void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
> {
> vunmap_page_range(addr, addr + size);
> }
> -EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
>
> /**
> * unmap_kernel_range - unmap kernel VM area and flush cache and TLB
> @@ -2047,7 +2046,6 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
> vunmap_page_range(addr, end);
> flush_tlb_kernel_range(addr, end);
> }
> -EXPORT_SYMBOL_GPL(unmap_kernel_range);
>
> int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
> {
> @@ -2059,7 +2057,12 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
>
> return err > 0 ? 0 : err;
> }
> -EXPORT_SYMBOL_GPL(map_vm_area);
> +
> +int map_vm_area_nx(struct vm_struct *area, pgprot prot, struct page **pages)
> +{
> + return map_vm_area(area, pgprot_nx(prot), pages);
> +}
> +EXPORT_SYMBOL_GPL(map_vm_area_nx);
>
> static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
> struct vmap_area *va, unsigned long flags, const void *caller)
> @@ -2133,7 +2136,6 @@ struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
> return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
> GFP_KERNEL, __builtin_return_address(0));
> }
> -EXPORT_SYMBOL_GPL(__get_vm_area);
>
> struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
> unsigned long start, unsigned long end,
> @@ -2160,6 +2162,7 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
> NUMA_NO_NODE, GFP_KERNEL,
> __builtin_return_address(0));
> }
> +EXPORT_SYMBOL_GPL(get_vm_area);
>
> struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
> const void *caller)
---end quoted text---