Re: [PATCH] iommu/dma: Add support for DMA_ATTR_FORCE_CONTIGUOUS
From: Robin Murphy
Date: Fri Jan 13 2017 - 06:41:46 EST
On 13/01/17 11:07, Geert Uytterhoeven wrote:
> Add support for DMA_ATTR_FORCE_CONTIGUOUS to the generic IOMMU DMA code.
> This allows to allocate physically contiguous DMA buffers on arm64
> systems with an IOMMU.
Can anyone explain what this attribute is actually used for? I've never
quite figured it out.
> Note that as this uses the CMA allocator, setting this attribute has a
> runtime-dependency on CONFIG_DMA_CMA, just like on arm32.
>
> For arm64 systems using swiotlb, no changes are needed to support the
> allocation of physically contiguous DMA buffers:
> - swiotlb always uses physically contiguous buffers (up to
> IO_TLB_SEGSIZE = 128 pages),
> - arm64's __dma_alloc_coherent() already calls
> dma_alloc_from_contiguous() when CMA is available.
>
> Signed-off-by: Geert Uytterhoeven <geert+renesas@xxxxxxxxx>
> ---
> arch/arm64/mm/dma-mapping.c | 4 ++--
> drivers/iommu/dma-iommu.c | 44 ++++++++++++++++++++++++++++++++++----------
> include/linux/dma-iommu.h | 2 +-
> 3 files changed, 37 insertions(+), 13 deletions(-)
>
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index 1d7d5d2881db7c19..5fba14a21163e41f 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -589,7 +589,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
> addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
> __builtin_return_address(0));
> if (!addr)
> - iommu_dma_free(dev, pages, iosize, handle);
> + iommu_dma_free(dev, pages, iosize, handle, attrs);
> } else {
> struct page *page;
> /*
> @@ -642,7 +642,7 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
>
> if (WARN_ON(!area || !area->pages))
> return;
> - iommu_dma_free(dev, area->pages, iosize, &handle);
> + iommu_dma_free(dev, area->pages, iosize, &handle, attrs);
> dma_common_free_remap(cpu_addr, size, VM_USERMAP);
> } else {
> iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 2db0d641cf4505b5..b991e8dcbbbb35c5 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -30,6 +30,7 @@
> #include <linux/pci.h>
> #include <linux/scatterlist.h>
> #include <linux/vmalloc.h>
> +#include <linux/dma-contiguous.h>
>
> struct iommu_dma_msi_page {
> struct list_head list;
> @@ -238,15 +239,21 @@ static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr)
> __free_iova(iovad, iova);
> }
>
> -static void __iommu_dma_free_pages(struct page **pages, int count)
> +static void __iommu_dma_free_pages(struct device *dev, struct page **pages,
> + int count, unsigned long attrs)
> {
> - while (count--)
> - __free_page(pages[count]);
> + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
> + dma_release_from_contiguous(dev, pages[0], count);
> + } else {
> + while (count--)
> + __free_page(pages[count]);
> + }
> kvfree(pages);
> }
>
> -static struct page **__iommu_dma_alloc_pages(unsigned int count,
> - unsigned long order_mask, gfp_t gfp)
> +static struct page **__iommu_dma_alloc_pages(struct device *dev,
> + unsigned int count, unsigned long order_mask, gfp_t gfp,
> + unsigned long attrs)
> {
> struct page **pages;
> unsigned int i = 0, array_size = count * sizeof(*pages);
> @@ -265,6 +272,20 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count,
> /* IOMMU can map any pages, so himem can also be used here */
> gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
>
> + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
> + int order = get_order(count << PAGE_SHIFT);
> + struct page *page;
> +
> + page = dma_alloc_from_contiguous(dev, count, order);
> + if (!page)
> + return NULL;
> +
> + while (count--)
> + pages[i++] = page++;
> +
> + return pages;
> + }
> +
This is really yuck. Plus it's entirely pointless to go through the
whole page array/scatterlist dance when we know the buffer is going to
be physically contiguous - it should just be allocate, map, done. I'd
much rather see standalone iommu_dma_{alloc,free}_contiguous()
functions, and let the arch code handle dispatching appropriately.
Robin.
> while (count) {
> struct page *page = NULL;
> unsigned int order_size;
> @@ -294,7 +315,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count,
> __free_pages(page, order);
> }
> if (!page) {
> - __iommu_dma_free_pages(pages, i);
> + __iommu_dma_free_pages(dev, pages, i, attrs);
> return NULL;
> }
> count -= order_size;
> @@ -310,15 +331,17 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count,
> * @pages: Array of buffer pages as returned by iommu_dma_alloc()
> * @size: Size of buffer in bytes
> * @handle: DMA address of buffer
> + * @attrs: DMA attributes used for allocation of this buffer
> *
> * Frees both the pages associated with the buffer, and the array
> * describing them
> */
> void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
> - dma_addr_t *handle)
> + dma_addr_t *handle, unsigned long attrs)
> {
> __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle);
> - __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
> + __iommu_dma_free_pages(dev, pages, PAGE_ALIGN(size) >> PAGE_SHIFT,
> + attrs);
> *handle = DMA_ERROR_CODE;
> }
>
> @@ -365,7 +388,8 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
> alloc_sizes = min_size;
>
> count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> - pages = __iommu_dma_alloc_pages(count, alloc_sizes >> PAGE_SHIFT, gfp);
> + pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT,
> + gfp, attrs);
> if (!pages)
> return NULL;
>
> @@ -403,7 +427,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
> out_free_iova:
> __free_iova(iovad, iova);
> out_free_pages:
> - __iommu_dma_free_pages(pages, count);
> + __iommu_dma_free_pages(dev, pages, count, attrs);
> return NULL;
> }
>
> diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
> index 7f7e9a7e3839966c..35fa6b7f9bac9b35 100644
> --- a/include/linux/dma-iommu.h
> +++ b/include/linux/dma-iommu.h
> @@ -44,7 +44,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
> unsigned long attrs, int prot, dma_addr_t *handle,
> void (*flush_page)(struct device *, const void *, phys_addr_t));
> void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
> - dma_addr_t *handle);
> + dma_addr_t *handle, unsigned long attrs);
>
> int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma);
>
>