[PATCH 20/21] ARM: dma-mapping: split out arch_dma_mark_clean() helper

From: Arnd Bergmann
Date: Mon Mar 27 2023 - 08:19:07 EST


From: Arnd Bergmann <arnd@xxxxxxxx>

The arm version of the arch_sync_dma_for_cpu() function annotates pages as
PG_dcache_clean after a DMA, but no other architecture does this here. On
ia64, the same thing is done in arch_sync_dma_for_cpu(), so it makes sense
to use the same hook in order to have identical arch_sync_dma_for_cpu()
semantics as all other architectures.

Splitting this out has multiple effects:

- for dma-direct, this now gets called after arch_sync_dma_for_cpu()
for DMA_FROM_DEVICE mappings, but not for DMA_BIDIRECTIONAL. While
it would not be harmful to keep doing it for bidirectional mappings,
those are apparently not used in any callers that care about the flag.

- Since arm has its own dma-iommu abstraction, this now also needs to
call the same function, so the calls are added there to mirror the
dma-direct version.

- Like dma-direct, the dma-iommu version now marks the dcache clean
for both coherent and noncoherent devices after a DMA, but it only
does this for DMA_FROM_DEVICE, not DMA_BIDIRECTIONAL.

[ HELP NEEDED: can anyone confirm that it is a correct assumption
on arm that a cache-coherent device writing to a page always results
in it being in a PG_dcache_clean state like on ia64, or can a device
write directly into the dcache?]

Signed-off-by: Arnd Bergmann <arnd@xxxxxxxx>
---
arch/arm/Kconfig | 1 +
arch/arm/mm/dma-mapping.c | 71 +++++++++++++++++++++++----------------
2 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e24a9820e12f..125d58c54ab1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -7,6 +7,7 @@ config ARM
select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL if MMU
+ select ARCH_HAS_DMA_MARK_CLEAN if MMU
select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index cc702cb27ae7..b703cb83d27e 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -665,6 +665,28 @@ static void dma_cache_maint(phys_addr_t paddr,
} while (left);
}

+/*
+ * Mark the D-cache clean for these pages to avoid extra flushing.
+ */
+void arch_dma_mark_clean(phys_addr_t paddr, size_t size)
+{
+ unsigned long pfn = PFN_UP(paddr);
+ unsigned long off = paddr & (PAGE_SIZE - 1);
+ size_t left = size;
+
+ if (size < PAGE_SIZE)
+ return;
+
+ if (off)
+ left -= PAGE_SIZE - off;
+
+ while (left >= PAGE_SIZE) {
+ struct page *page = pfn_to_page(pfn++);
+ set_bit(PG_dcache_clean, &page->flags);
+ left -= PAGE_SIZE;
+ }
+}
+
static bool arch_sync_dma_cpu_needs_post_dma_flush(void)
{
if (IS_ENABLED(CONFIG_CPU_V6) ||
@@ -715,24 +737,6 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
outer_inv_range(paddr, paddr + size);
dma_cache_maint(paddr, size, dmac_inv_range);
}
-
- /*
- * Mark the D-cache clean for these pages to avoid extra flushing.
- */
- if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
- unsigned long pfn = PFN_UP(paddr);
- unsigned long off = paddr & (PAGE_SIZE - 1);
- size_t left = size;
-
- if (off)
- left -= PAGE_SIZE - off;
-
- while (left >= PAGE_SIZE) {
- struct page *page = pfn_to_page(pfn++);
- set_bit(PG_dcache_clean, &page->flags);
- left -= PAGE_SIZE;
- }
- }
}

#ifdef CONFIG_ARM_DMA_USE_IOMMU
@@ -1294,6 +1298,17 @@ static int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg,
return -EINVAL;
}

+static void arm_iommu_sync_dma_for_cpu(phys_addr_t phys, size_t len,
+ enum dma_data_direction dir,
+ bool dma_coherent)
+{
+ if (!dma_coherent)
+ arch_sync_dma_for_cpu(phys, s->length, dir);
+
+ if (dir == DMA_FROM_DEVICE)
+ arch_dma_mark_clean(phys, s->length);
+}
+
/**
* arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
* @dev: valid struct device pointer
@@ -1316,8 +1331,9 @@ static void arm_iommu_unmap_sg(struct device *dev,
if (sg_dma_len(s))
__iommu_remove_mapping(dev, sg_dma_address(s),
sg_dma_len(s));
- if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- arch_sync_dma_for_cpu(sg_phys(s), s->length, dir);
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arm_iommu_sync_dma_for_cpu(sg_phys(s), s->length, dir,
+ dev->dma_coherent);
}
}

@@ -1335,12 +1351,9 @@ static void arm_iommu_sync_sg_for_cpu(struct device *dev,
struct scatterlist *s;
int i;

- if (dev->dma_coherent)
- return;
-
for_each_sg(sg, s, nents, i)
- arch_sync_dma_for_cpu(sg_phys(s), s->length, dir);
-
+ arm_iommu_sync_dma_for_cpu(sg_phys(s), s->length, dir,
+ dev->dma_coherent);
}

/**
@@ -1425,9 +1438,9 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
if (!iova)
return;

- if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
phys = iommu_iova_to_phys(mapping->domain, handle);
- arch_sync_dma_for_cpu(phys, size, dir);
+ arm_iommu_sync_dma_for_cpu(phys, size, dir, dev->dma_coherent);
}

iommu_unmap(mapping->domain, iova, len);
@@ -1497,11 +1510,11 @@ static void arm_iommu_sync_single_for_cpu(struct device *dev,
struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
phys_addr_t phys;

- if (dev->dma_coherent || !(handle & PAGE_MASK))
+ if (!(handle & PAGE_MASK))
return;

phys = iommu_iova_to_phys(mapping->domain, handle);
- arch_sync_dma_for_cpu(phys, size, dir);
+ arm_iommu_sync_dma_for_cpu(phys, size, dir, dev->dma_coherent);
}

static void arm_iommu_sync_single_for_device(struct device *dev,
--
2.39.2