Re: [PATCH 4/9] dma-mapping: move the arm64 ncoherent alloc/free support to common code
From: Russell King - ARM Linux
Date: Tue Dec 04 2018 - 05:10:11 EST
On Mon, Nov 05, 2018 at 01:19:26PM +0100, Christoph Hellwig wrote:
> The arm64 codebase to implement coherent dma allocation for architectures
> with non-coherent DMA is a good start for a generic implementation, given
> that is uses the generic remap helpers, provides the atomic pool for
> allocations that can't sleep and still is realtively simple and well
> tested. Move it to kernel/dma and allow architectures to opt into it
> using a config symbol. Architectures just need to provide a new
> arch_dma_prep_coherent helper to writeback an invalidate the caches
> for any memory that gets remapped for uncached access.
>
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---
> arch/arm64/Kconfig | 2 +-
> arch/arm64/mm/dma-mapping.c | 184 ++------------------------------
> include/linux/dma-mapping.h | 5 +
> include/linux/dma-noncoherent.h | 2 +
> kernel/dma/Kconfig | 6 ++
> kernel/dma/remap.c | 158 ++++++++++++++++++++++++++-
> 6 files changed, 181 insertions(+), 176 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 5d065acb6d10..2e645ea693ea 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -82,7 +82,7 @@ config ARM64
> select CRC32
> select DCACHE_WORD_ACCESS
> select DMA_DIRECT_OPS
> - select DMA_REMAP
> + select DMA_DIRECT_REMAP
> select EDAC_SUPPORT
> select FRAME_POINTER
> select GENERIC_ALLOCATOR
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index a3ac26284845..e2e7e5d0f94e 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -33,113 +33,6 @@
>
> #include <asm/cacheflush.h>
>
> -static struct gen_pool *atomic_pool __ro_after_init;
> -
> -#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
> -static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
> -
> -static int __init early_coherent_pool(char *p)
> -{
> - atomic_pool_size = memparse(p, &p);
> - return 0;
> -}
> -early_param("coherent_pool", early_coherent_pool);
> -
> -static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
> -{
> - unsigned long val;
> - void *ptr = NULL;
> -
> - if (!atomic_pool) {
> - WARN(1, "coherent pool not initialised!\n");
> - return NULL;
> - }
> -
> - val = gen_pool_alloc(atomic_pool, size);
> - if (val) {
> - phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
> -
> - *ret_page = phys_to_page(phys);
> - ptr = (void *)val;
> - memset(ptr, 0, size);
> - }
> -
> - return ptr;
> -}
> -
> -static bool __in_atomic_pool(void *start, size_t size)
> -{
> - return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
> -}
> -
> -static int __free_from_pool(void *start, size_t size)
> -{
> - if (!__in_atomic_pool(start, size))
> - return 0;
> -
> - gen_pool_free(atomic_pool, (unsigned long)start, size);
> -
> - return 1;
> -}
> -
> -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> - gfp_t flags, unsigned long attrs)
> -{
> - struct page *page;
> - void *ptr, *coherent_ptr;
> - pgprot_t prot = pgprot_writecombine(PAGE_KERNEL);
> -
> - size = PAGE_ALIGN(size);
> -
> - if (!gfpflags_allow_blocking(flags)) {
> - struct page *page = NULL;
> - void *addr = __alloc_from_pool(size, &page, flags);
> -
> - if (addr)
> - *dma_handle = phys_to_dma(dev, page_to_phys(page));
> -
> - return addr;
> - }
> -
> - ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
> - if (!ptr)
> - goto no_mem;
> -
> - /* remove any dirty cache lines on the kernel alias */
> - __dma_flush_area(ptr, size);
> -
> - /* create a coherent mapping */
> - page = virt_to_page(ptr);
> - coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
> - prot, __builtin_return_address(0));
> - if (!coherent_ptr)
> - goto no_map;
> -
> - return coherent_ptr;
> -
> -no_map:
> - dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs);
> -no_mem:
> - return NULL;
> -}
> -
> -void arch_dma_free(struct device *dev, size_t size, void *vaddr,
> - dma_addr_t dma_handle, unsigned long attrs)
> -{
> - if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) {
> - void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
> -
> - vunmap(vaddr);
> - dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
> - }
> -}
> -
> -long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
> - dma_addr_t dma_addr)
> -{
> - return __phys_to_pfn(dma_to_phys(dev, dma_addr));
> -}
> -
> pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
> unsigned long attrs)
> {
> @@ -160,6 +53,11 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
> __dma_unmap_area(phys_to_virt(paddr), size, dir);
> }
>
> +void arch_dma_prep_coherent(struct page *page, size_t size)
> +{
> + __dma_flush_area(page_address(page), size);
> +}
> +
> #ifdef CONFIG_IOMMU_DMA
> static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
> struct page *page, size_t size)
> @@ -191,67 +89,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
> }
> #endif /* CONFIG_IOMMU_DMA */
>
> -static int __init atomic_pool_init(void)
> -{
> - pgprot_t prot = __pgprot(PROT_NORMAL_NC);
> - unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
> - struct page *page;
> - void *addr;
> - unsigned int pool_size_order = get_order(atomic_pool_size);
> -
> - if (dev_get_cma_area(NULL))
> - page = dma_alloc_from_contiguous(NULL, nr_pages,
> - pool_size_order, false);
> - else
> - page = alloc_pages(GFP_DMA32, pool_size_order);
> -
> - if (page) {
> - int ret;
> - void *page_addr = page_address(page);
> -
> - memset(page_addr, 0, atomic_pool_size);
> - __dma_flush_area(page_addr, atomic_pool_size);
> -
> - atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
> - if (!atomic_pool)
> - goto free_page;
> -
> - addr = dma_common_contiguous_remap(page, atomic_pool_size,
> - VM_USERMAP, prot, atomic_pool_init);
> -
> - if (!addr)
> - goto destroy_genpool;
> -
> - ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
> - page_to_phys(page),
> - atomic_pool_size, -1);
> - if (ret)
> - goto remove_mapping;
> -
> - gen_pool_set_algo(atomic_pool,
> - gen_pool_first_fit_order_align,
> - NULL);
> -
> - pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
> - atomic_pool_size / 1024);
> - return 0;
> - }
> - goto out;
> -
> -remove_mapping:
> - dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
> -destroy_genpool:
> - gen_pool_destroy(atomic_pool);
> - atomic_pool = NULL;
> -free_page:
> - if (!dma_release_from_contiguous(NULL, page, nr_pages))
> - __free_pages(page, pool_size_order);
> -out:
> - pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
> - atomic_pool_size / 1024);
> - return -ENOMEM;
> -}
> -
> /********************************************
> * The following APIs are for dummy DMA ops *
> ********************************************/
> @@ -350,8 +187,7 @@ static int __init arm64_dma_init(void)
> TAINT_CPU_OUT_OF_SPEC,
> "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
> ARCH_DMA_MINALIGN, cache_line_size());
> -
> - return atomic_pool_init();
> + return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
> }
> arch_initcall(arm64_dma_init);
>
> @@ -397,7 +233,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
> page = alloc_pages(gfp, get_order(size));
> addr = page ? page_address(page) : NULL;
> } else {
> - addr = __alloc_from_pool(size, &page, gfp);
> + addr = dma_alloc_from_pool(size, &page, gfp);
> }
> if (!addr)
> return NULL;
> @@ -407,7 +243,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
> if (coherent)
> __free_pages(page, get_order(size));
> else
> - __free_from_pool(addr, size);
> + dma_free_from_pool(addr, size);
> addr = NULL;
> }
> } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
> @@ -471,9 +307,9 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
> * coherent devices.
> * Hence how dodgy the below logic looks...
> */
> - if (__in_atomic_pool(cpu_addr, size)) {
> + if (dma_in_atomic_pool(cpu_addr, size)) {
> iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
> - __free_from_pool(cpu_addr, size);
> + dma_free_from_pool(cpu_addr, size);
> } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
> struct page *page = vmalloc_to_page(cpu_addr);
>
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 15bd41447025..56ed94b99963 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -455,6 +455,11 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
> const void *caller);
> void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags);
>
> +int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot);
> +bool dma_in_atomic_pool(void *start, size_t size);
> +void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
> +bool dma_free_from_pool(void *start, size_t size);
> +
> /**
> * dma_mmap_attrs - map a coherent DMA allocation into user space
> * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
> diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
> index 9051b055beec..306557331d7d 100644
> --- a/include/linux/dma-noncoherent.h
> +++ b/include/linux/dma-noncoherent.h
> @@ -69,4 +69,6 @@ static inline void arch_sync_dma_for_cpu_all(struct device *dev)
> }
> #endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */
>
> +void arch_dma_prep_coherent(struct page *page, size_t size);
> +
> #endif /* _LINUX_DMA_NONCOHERENT_H */
> diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
> index c92e08173ed8..fb045ebb0713 100644
> --- a/kernel/dma/Kconfig
> +++ b/kernel/dma/Kconfig
> @@ -55,3 +55,9 @@ config SWIOTLB
> config DMA_REMAP
> depends on MMU
> bool
> +
> +config DMA_DIRECT_REMAP
> + bool
> + depends on DMA_DIRECT_OPS
> + select DMA_REMAP
> +
> diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
> index 456f7cc3414d..bc42766f52df 100644
> --- a/kernel/dma/remap.c
> +++ b/kernel/dma/remap.c
> @@ -1,8 +1,13 @@
> // SPDX-License-Identifier: GPL-2.0
> /*
> + * Copyright (C) 2012 ARM Ltd.
> * Copyright (c) 2014 The Linux Foundation
> */
> -#include <linux/dma-mapping.h>
> +#include <linux/dma-direct.h>
> +#include <linux/dma-noncoherent.h>
> +#include <linux/dma-contiguous.h>
> +#include <linux/init.h>
> +#include <linux/genalloc.h>
> #include <linux/slab.h>
> #include <linux/vmalloc.h>
>
> @@ -86,3 +91,154 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
> unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
> vunmap(cpu_addr);
> }
> +
> +#ifdef CONFIG_DMA_DIRECT_REMAP
> +static struct gen_pool *atomic_pool __ro_after_init;
> +
> +#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
> +static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
> +
> +static int __init early_coherent_pool(char *p)
> +{
> + atomic_pool_size = memparse(p, &p);
> + return 0;
> +}
> +early_param("coherent_pool", early_coherent_pool);
> +
> +int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
> +{
> + unsigned int pool_size_order = get_order(atomic_pool_size);
> + unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
> + struct page *page;
> + void *addr;
> + int ret;
> +
> + if (dev_get_cma_area(NULL))
> + page = dma_alloc_from_contiguous(NULL, nr_pages,
> + pool_size_order, false);
> + else
> + page = alloc_pages(gfp, pool_size_order);
> + if (!page)
> + goto out;
> +
> + memset(page_address(page), 0, atomic_pool_size);
Note that this won't work if 'page' is a highmem page - should there
be a check for that, or a check for the gfp flags?
Also, is this memset() actually useful, or a waste of cycles - when we
allocate from this pool (see dma_alloc_from_pool()), we always memset()
the buffer.
> + arch_dma_prep_coherent(page, atomic_pool_size);
> +
> + atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
> + if (!atomic_pool)
> + goto free_page;
> +
> + addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP,
> + prot, __builtin_return_address(0));
> + if (!addr)
> + goto destroy_genpool;
> +
> + ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
> + page_to_phys(page), atomic_pool_size, -1);
> + if (ret)
> + goto remove_mapping;
> + gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
> +
> + pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
> + atomic_pool_size / 1024);
> + return 0;
> +
> +remove_mapping:
> + dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
> +destroy_genpool:
> + gen_pool_destroy(atomic_pool);
> + atomic_pool = NULL;
> +free_page:
> + if (!dma_release_from_contiguous(NULL, page, nr_pages))
> + __free_pages(page, pool_size_order);
> +out:
> + pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
> + atomic_pool_size / 1024);
> + return -ENOMEM;
> +}
> +
> +bool dma_in_atomic_pool(void *start, size_t size)
> +{
> + return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
> +}
> +
> +void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
> +{
> + unsigned long val;
> + void *ptr = NULL;
> +
> + if (!atomic_pool) {
> + WARN(1, "coherent pool not initialised!\n");
> + return NULL;
> + }
> +
> + val = gen_pool_alloc(atomic_pool, size);
> + if (val) {
> + phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
> +
> + *ret_page = phys_to_page(phys);
> + ptr = (void *)val;
> + memset(ptr, 0, size);
> + }
> +
> + return ptr;
> +}
> +
> +bool dma_free_from_pool(void *start, size_t size)
> +{
> + if (!dma_in_atomic_pool(start, size))
> + return false;
> + gen_pool_free(atomic_pool, (unsigned long)start, size);
> + return true;
> +}
> +
> +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
> + gfp_t flags, unsigned long attrs)
> +{
> + struct page *page = NULL;
> + void *ret, *kaddr;
> +
> + size = PAGE_ALIGN(size);
> +
> + if (!gfpflags_allow_blocking(flags)) {
> + ret = dma_alloc_from_pool(size, &page, flags);
> + if (!ret)
> + return NULL;
> + *dma_handle = phys_to_dma(dev, page_to_phys(page));
> + return ret;
> + }
> +
> + kaddr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
> + if (!kaddr)
> + return NULL;
> + page = virt_to_page(kaddr);
> +
> + /* remove any dirty cache lines on the kernel alias */
> + arch_dma_prep_coherent(page, size);
> +
> + /* create a coherent mapping */
> + ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
> + arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
> + __builtin_return_address(0));
> + if (!ret)
> + dma_direct_free_pages(dev, size, kaddr, *dma_handle, attrs);
> + return ret;
> +}
> +
> +void arch_dma_free(struct device *dev, size_t size, void *vaddr,
> + dma_addr_t dma_handle, unsigned long attrs)
> +{
> + if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
> + void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
> +
> + vunmap(vaddr);
> + dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
> + }
> +}
> +
> +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
> + dma_addr_t dma_addr)
> +{
> + return __phys_to_pfn(dma_to_phys(dev, dma_addr));
> +}
> +#endif /* CONFIG_DMA_DIRECT_REMAP */
> --
> 2.19.1
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
--
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up