Re: [PATCH 21/21] dma-mapping: replace custom code with generic implementation

From: Lad, Prabhakar
Date: Thu Mar 30 2023 - 10:06:50 EST


On Mon, Mar 27, 2023 at 1:20 PM Arnd Bergmann <arnd@xxxxxxxxxx> wrote:
>
> From: Arnd Bergmann <arnd@xxxxxxxx>
>
> Now that all of these have consistent behavior, replace them with
> a single shared implementation of arch_sync_dma_for_device() and
> arch_sync_dma_for_cpu() and three parameters to pick how they should
> operate:
>
> - If the CPU has speculative prefetching, then the cache
> has to be invalidated after a transfer from the device.
> On the rarer CPUs without prefetching, this can be skipped,
> with all cache management happening before the transfer.
> This flag can be runtime detected, but is usually fixed
> per architecture.
>
> - Some architectures currently clean the caches before DMA
> from a device, while others invalidate it. There has not
> been a conclusion regarding whether we should change all
> architectures to use clean instead, so this adds an
> architecture specific flag that we can change later on.
>
> - On 32-bit Arm, the arch_sync_dma_for_cpu() function keeps
> track pages that are marked clean in the page cache, to
> avoid flushing them again. The implementation for this is
> generic enough to work on all architectures that use the
> PG_dcache_clean page flag, but a Kconfig symbol is used
> to only enable it on Arm to preserve the existing behavior.
>
> For the function naming, I picked 'wback' over 'clean', and 'wback_inv'
> over 'flush', to avoid any ambiguity of what the helper functions are
> supposed to do.
>
> Moving the global functions into a header file is usually a bad idea
> as it prevents the header from being included more than once, but it
> helps keep the behavior as close as possible to the previous state,
> including the possibility of inlining most of it into these functions
> where that was done before. This also helps keep the global namespace
> clean, by hiding the new arch_dma_cache{_wback,_inv,_wback_inv} from
> device drivers that might use them incorrectly.
>
> It would be possible to do this one architecture at a time, but
> as the change is the same everywhere, the combined patch helps
> explain it better once.
>
> Signed-off-by: Arnd Bergmann <arnd@xxxxxxxx>
> ---
> arch/arc/mm/dma.c | 66 +++++-------------
> arch/arm/Kconfig | 3 +
> arch/arm/mm/dma-mapping-nommu.c | 39 ++++++-----
> arch/arm/mm/dma-mapping.c | 64 +++++++-----------
> arch/arm64/mm/dma-mapping.c | 28 +++++---
> arch/csky/mm/dma-mapping.c | 44 ++++++------
> arch/hexagon/kernel/dma.c | 44 ++++++------
> arch/m68k/kernel/dma.c | 43 +++++++-----
> arch/microblaze/kernel/dma.c | 48 +++++++-------
> arch/mips/mm/dma-noncoherent.c | 60 +++++++----------
> arch/nios2/mm/dma-mapping.c | 57 +++++++---------
> arch/openrisc/kernel/dma.c | 63 +++++++++++-------
> arch/parisc/kernel/pci-dma.c | 46 ++++++-------
> arch/powerpc/mm/dma-noncoherent.c | 34 ++++++----
> arch/riscv/mm/dma-noncoherent.c | 51 +++++++-------
> arch/sh/kernel/dma-coherent.c | 43 +++++++-----
> arch/sparc/kernel/ioport.c | 38 ++++++++---
> arch/xtensa/kernel/pci-dma.c | 40 ++++++-----
> include/linux/dma-sync.h | 107 ++++++++++++++++++++++++++++++
> 19 files changed, 527 insertions(+), 391 deletions(-)
> create mode 100644 include/linux/dma-sync.h
>
I tested this on RZ/Five (with my v6 [0] + additional changes) so for RISC-V,

Reviewed-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@xxxxxxxxxxxxxx>
Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@xxxxxxxxxxxxxx>

[0] https://patchwork.kernel.org/project/linux-renesas-soc/cover/20230106185526.260163-1-prabhakar.mahadev-lad.rj@xxxxxxxxxxxxxx/

Cheers,
Prabhakar

> diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
> index ddb96786f765..61cd01646222 100644
> --- a/arch/arc/mm/dma.c
> +++ b/arch/arc/mm/dma.c
> @@ -30,63 +30,33 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
> dma_cache_wback_inv(page_to_phys(page), size);
> }
>
> -/*
> - * Cache operations depending on function and direction argument, inspired by
> - * https://lore.kernel.org/lkml/20180518175004.GF17671@xxxxxxxxxxxxxxxxxxxxx
> - * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20]
> - * dma-mapping: provide a generic dma-noncoherent implementation)"
> - *
> - * | map == for_device | unmap == for_cpu
> - * |----------------------------------------------------------------
> - * TO_DEV | writeback writeback | none none
> - * FROM_DEV | invalidate invalidate | invalidate* invalidate*
> - * BIDIR | writeback writeback | invalidate invalidate
> - *
> - * [*] needed for CPU speculative prefetches
> - *
> - * NOTE: we don't check the validity of direction argument as it is done in
> - * upper layer functions (in include/linux/dma-mapping.h)
> - */
> -
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - dma_cache_wback(paddr, size);
> - break;
> -
> - case DMA_FROM_DEVICE:
> - dma_cache_inv(paddr, size);
> - break;
> -
> - case DMA_BIDIRECTIONAL:
> - dma_cache_wback(paddr, size);
> - break;
> + dma_cache_wback(paddr, size);
> +}
>
> - default:
> - break;
> - }
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> +{
> + dma_cache_inv(paddr, size);
> }
>
> -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> {
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - break;
> + dma_cache_wback_inv(paddr, size);
> +}
>
> - /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */
> - case DMA_FROM_DEVICE:
> - case DMA_BIDIRECTIONAL:
> - dma_cache_inv(paddr, size);
> - break;
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return false;
> +}
>
> - default:
> - break;
> - }
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return true;
> }
>
> +#include <linux/dma-sync.h>
> +
> /*
> * Plug in direct dma map ops.
> */
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 125d58c54ab1..0de84e861027 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -212,6 +212,9 @@ config LOCKDEP_SUPPORT
> bool
> default y
>
> +config ARCH_DMA_MARK_DCACHE_CLEAN
> + def_bool y
> +
> config ARCH_HAS_ILOG2_U32
> bool
>
> diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
> index 12b5c6ae93fc..0817274aed15 100644
> --- a/arch/arm/mm/dma-mapping-nommu.c
> +++ b/arch/arm/mm/dma-mapping-nommu.c
> @@ -13,27 +13,36 @@
>
> #include "dma.h"
>
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> - if (dir == DMA_FROM_DEVICE) {
> - dmac_inv_range(__va(paddr), __va(paddr + size));
> - outer_inv_range(paddr, paddr + size);
> - } else {
> - dmac_clean_range(__va(paddr), __va(paddr + size));
> - outer_clean_range(paddr, paddr + size);
> - }
> + dmac_clean_range(__va(paddr), __va(paddr + size));
> + outer_clean_range(paddr, paddr + size);
> }
>
> -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> {
> - if (dir != DMA_TO_DEVICE) {
> - outer_inv_range(paddr, paddr + size);
> - dmac_inv_range(__va(paddr), __va(paddr));
> - }
> + dmac_inv_range(__va(paddr), __va(paddr + size));
> + outer_inv_range(paddr, paddr + size);
> }
>
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> +{
> + dmac_flush_range(__va(paddr), __va(paddr + size));
> + outer_flush_range(paddr, paddr + size);
> +}
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return false;
> +}
> +
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return true;
> +}
> +
> +#include <linux/dma-sync.h>
> +
> void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
> const struct iommu_ops *iommu, bool coherent)
> {
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index b703cb83d27e..aa6ee820a0ab 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -687,6 +687,30 @@ void arch_dma_mark_clean(phys_addr_t paddr, size_t size)
> }
> }
>
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> +{
> + dma_cache_maint(paddr, size, dmac_clean_range);
> + outer_clean_range(paddr, paddr + size);
> +}
> +
> +
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> +{
> + dma_cache_maint(paddr, size, dmac_inv_range);
> + outer_inv_range(paddr, paddr + size);
> +}
> +
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> +{
> + dma_cache_maint(paddr, size, dmac_flush_range);
> + outer_flush_range(paddr, paddr + size);
> +}
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return false;
> +}
> +
> static bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> {
> if (IS_ENABLED(CONFIG_CPU_V6) ||
> @@ -699,45 +723,7 @@ static bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> return false;
> }
>
> -/*
> - * Make an area consistent for devices.
> - * Note: Drivers should NOT use this function directly.
> - * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
> - */
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> -{
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - dma_cache_maint(paddr, size, dmac_clean_range);
> - outer_clean_range(paddr, paddr + size);
> - break;
> - case DMA_FROM_DEVICE:
> - dma_cache_maint(paddr, size, dmac_inv_range);
> - outer_inv_range(paddr, paddr + size);
> - break;
> - case DMA_BIDIRECTIONAL:
> - if (arch_sync_dma_cpu_needs_post_dma_flush()) {
> - dma_cache_maint(paddr, size, dmac_clean_range);
> - outer_clean_range(paddr, paddr + size);
> - } else {
> - dma_cache_maint(paddr, size, dmac_flush_range);
> - outer_flush_range(paddr, paddr + size);
> - }
> - break;
> - default:
> - break;
> - }
> -}
> -
> -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> -{
> - if (dir != DMA_TO_DEVICE && arch_sync_dma_cpu_needs_post_dma_flush()) {
> - outer_inv_range(paddr, paddr + size);
> - dma_cache_maint(paddr, size, dmac_inv_range);
> - }
> -}
> +#include <linux/dma-sync.h>
>
> #ifdef CONFIG_ARM_DMA_USE_IOMMU
>
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index 5240f6acad64..bae741aa65e9 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -13,25 +13,33 @@
> #include <asm/cacheflush.h>
> #include <asm/xen/xen-ops.h>
>
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> - unsigned long start = (unsigned long)phys_to_virt(paddr);
> + dcache_clean_poc(paddr, paddr + size);
> +}
>
> - dcache_clean_poc(start, start + size);
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> +{
> + dcache_inval_poc(paddr, paddr + size);
> }
>
> -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> {
> - unsigned long start = (unsigned long)phys_to_virt(paddr);
> + dcache_clean_inval_poc(paddr, paddr + size);
> +}
>
> - if (dir == DMA_TO_DEVICE)
> - return;
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return true;
> +}
>
> - dcache_inval_poc(start, start + size);
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return true;
> }
>
> +#include <linux/dma-sync.h>
> +
> void arch_dma_prep_coherent(struct page *page, size_t size)
> {
> unsigned long start = (unsigned long)page_address(page);
> diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
> index c90f912e2822..9402e101b363 100644
> --- a/arch/csky/mm/dma-mapping.c
> +++ b/arch/csky/mm/dma-mapping.c
> @@ -55,31 +55,29 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
> cache_op(page_to_phys(page), size, dma_wbinv_set_zero_range);
> }
>
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - case DMA_FROM_DEVICE:
> - case DMA_BIDIRECTIONAL:
> - cache_op(paddr, size, dma_wb_range);
> - break;
> - default:
> - BUG();
> - }
> + cache_op(paddr, size, dma_wb_range);
> }
>
> -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> {
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - return;
> - case DMA_FROM_DEVICE:
> - case DMA_BIDIRECTIONAL:
> - cache_op(paddr, size, dma_inv_range);
> - break;
> - default:
> - BUG();
> - }
> + cache_op(paddr, size, dma_inv_range);
> }
> +
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> +{
> + cache_op(paddr, size, dma_wbinv_range);
> +}
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return true;
> +}
> +
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return true;
> +}
> +
> +#include <linux/dma-sync.h>
> diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c
> index 882680e81a30..e6538128a75b 100644
> --- a/arch/hexagon/kernel/dma.c
> +++ b/arch/hexagon/kernel/dma.c
> @@ -9,29 +9,33 @@
> #include <linux/memblock.h>
> #include <asm/page.h>
>
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> - void *addr = phys_to_virt(paddr);
> -
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - hexagon_clean_dcache_range((unsigned long) addr,
> - (unsigned long) addr + size);
> - break;
> - case DMA_FROM_DEVICE:
> - hexagon_inv_dcache_range((unsigned long) addr,
> - (unsigned long) addr + size);
> - break;
> - case DMA_BIDIRECTIONAL:
> - flush_dcache_range((unsigned long) addr,
> - (unsigned long) addr + size);
> - break;
> - default:
> - BUG();
> - }
> + hexagon_clean_dcache_range(paddr, paddr + size);
> }
>
> +static inline void arch_dma_cache_inv(phys_addr_t start, size_t size)
> +{
> + hexagon_inv_dcache_range(paddr, paddr + size);
> +}
> +
> +static inline void arch_dma_cache_wback_inv(phys_addr_t start, size_t size)
> +{
> + hexagon_flush_dcache_range(paddr, paddr + size);
> +}
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return false;
> +}
> +
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return false;
> +}
> +
> +#include <linux/dma-sync.h>
> +
> /*
> * Our max_low_pfn should have been backed off by 16MB in mm/init.c to create
> * DMA coherent space. Use that for the pool.
> diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
> index 2e192a5df949..aa9b434e6df8 100644
> --- a/arch/m68k/kernel/dma.c
> +++ b/arch/m68k/kernel/dma.c
> @@ -58,20 +58,33 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
>
> #endif /* CONFIG_MMU && !CONFIG_COLDFIRE */
>
> -void arch_sync_dma_for_device(phys_addr_t handle, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> - switch (dir) {
> - case DMA_BIDIRECTIONAL:
> - case DMA_TO_DEVICE:
> - cache_push(handle, size);
> - break;
> - case DMA_FROM_DEVICE:
> - cache_clear(handle, size);
> - break;
> - default:
> - pr_err_ratelimited("dma_sync_single_for_device: unsupported dir %u\n",
> - dir);
> - break;
> - }
> + /*
> + * cache_push() always invalidates in addition to cleaning
> + * write-back caches.
> + */
> + cache_push(paddr, size);
> +}
> +
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> +{
> + cache_clear(paddr, size);
> +}
> +
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> +{
> + cache_push(paddr, size);
> }
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return false;
> +}
> +
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return false;
> +}
> +
> +#include <linux/dma-sync.h>
> diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
> index b4c4e45fd45e..01110d4aa5b0 100644
> --- a/arch/microblaze/kernel/dma.c
> +++ b/arch/microblaze/kernel/dma.c
> @@ -14,32 +14,30 @@
> #include <linux/bug.h>
> #include <asm/cacheflush.h>
>
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> - switch (direction) {
> - case DMA_TO_DEVICE:
> - case DMA_BIDIRECTIONAL:
> - flush_dcache_range(paddr, paddr + size);
> - break;
> - case DMA_FROM_DEVICE:
> - invalidate_dcache_range(paddr, paddr + size);
> - break;
> - default:
> - BUG();
> - }
> + /* writeback plus invalidate, could be a nop on WT caches */
> + flush_dcache_range(paddr, paddr + size);
> }
>
> -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> {
> - switch (direction) {
> - case DMA_TO_DEVICE:
> - break;
> - case DMA_BIDIRECTIONAL:
> - case DMA_FROM_DEVICE:
> - invalidate_dcache_range(paddr, paddr + size);
> - break;
> - default:
> - BUG();
> - }}
> + invalidate_dcache_range(paddr, paddr + size);
> +}
> +
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> +{
> + flush_dcache_range(paddr, paddr + size);
> +}
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return false;
> +}
> +
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return true;
> +}
> +
> +#include <linux/dma-sync.h>
> diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
> index b9d68bcc5d53..902d4b7c1f85 100644
> --- a/arch/mips/mm/dma-noncoherent.c
> +++ b/arch/mips/mm/dma-noncoherent.c
> @@ -85,50 +85,38 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size,
> } while (left);
> }
>
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - dma_sync_phys(paddr, size, _dma_cache_wback);
> - break;
> - case DMA_FROM_DEVICE:
> - dma_sync_phys(paddr, size, _dma_cache_inv);
> - break;
> - case DMA_BIDIRECTIONAL:
> - if (IS_ENABLED(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) &&
> - cpu_needs_post_dma_flush())
> - dma_sync_phys(paddr, size, _dma_cache_wback);
> - else
> - dma_sync_phys(paddr, size, _dma_cache_wback_inv);
> - break;
> - default:
> - break;
> - }
> + dma_sync_phys(paddr, size, _dma_cache_wback);
> }
>
> -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
> -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> {
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - break;
> - case DMA_FROM_DEVICE:
> - case DMA_BIDIRECTIONAL:
> - if (cpu_needs_post_dma_flush())
> - dma_sync_phys(paddr, size, _dma_cache_inv);
> - break;
> - default:
> - break;
> - }
> + dma_sync_phys(paddr, size, _dma_cache_inv);
> }
> -#endif
> +
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> +{
> + dma_sync_phys(paddr, size, _dma_cache_wback_inv);
> +}
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return false;
> +}
> +
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return IS_ENABLED(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) &&
> + cpu_needs_post_dma_flush();
> +}
> +
> +#include <linux/dma-sync.h>
>
> #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
> void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
> - const struct iommu_ops *iommu, bool coherent)
> + const struct iommu_ops *iommu, bool coherent)
> {
> - dev->dma_coherent = coherent;
> + dev->dma_coherent = coherent;
> }
> #endif
> diff --git a/arch/nios2/mm/dma-mapping.c b/arch/nios2/mm/dma-mapping.c
> index fd887d5f3f9a..29978970955e 100644
> --- a/arch/nios2/mm/dma-mapping.c
> +++ b/arch/nios2/mm/dma-mapping.c
> @@ -13,53 +13,46 @@
> #include <linux/types.h>
> #include <linux/mm.h>
> #include <linux/string.h>
> +#include <linux/dma-map-ops.h>
> #include <linux/dma-mapping.h>
> #include <linux/io.h>
> #include <linux/cache.h>
> #include <asm/cacheflush.h>
>
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> + /*
> + * We just need to write back the caches here, but Nios2 flush
> + * instruction will do both writeback and invalidate.
> + */
> void *vaddr = phys_to_virt(paddr);
> + flush_dcache_range((unsigned long)vaddr, (unsigned long)(vaddr + size));
> +}
>
> - switch (dir) {
> - case DMA_FROM_DEVICE:
> - invalidate_dcache_range((unsigned long)vaddr,
> - (unsigned long)(vaddr + size));
> - break;
> - case DMA_TO_DEVICE:
> - /*
> - * We just need to flush the caches here , but Nios2 flush
> - * instruction will do both writeback and invalidate.
> - */
> - case DMA_BIDIRECTIONAL: /* flush and invalidate */
> - flush_dcache_range((unsigned long)vaddr,
> - (unsigned long)(vaddr + size));
> - break;
> - default:
> - BUG();
> - }
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> +{
> + unsigned long vaddr = (unsigned long)phys_to_virt(paddr);
> + invalidate_dcache_range(vaddr, (unsigned long)(vaddr + size));
> }
>
> -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> {
> void *vaddr = phys_to_virt(paddr);
> + flush_dcache_range((unsigned long)vaddr, (unsigned long)(vaddr + size));
> +}
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return false;
> +}
>
> - switch (dir) {
> - case DMA_BIDIRECTIONAL:
> - case DMA_FROM_DEVICE:
> - invalidate_dcache_range((unsigned long)vaddr,
> - (unsigned long)(vaddr + size));
> - break;
> - case DMA_TO_DEVICE:
> - break;
> - default:
> - BUG();
> - }
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return true;
> }
>
> +#include <linux/dma-sync.h>
> +
> void arch_dma_prep_coherent(struct page *page, size_t size)
> {
> unsigned long start = (unsigned long)page_address(page);
> diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
> index 91a00d09ffad..aba2258e62eb 100644
> --- a/arch/openrisc/kernel/dma.c
> +++ b/arch/openrisc/kernel/dma.c
> @@ -95,32 +95,47 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size)
> mmap_write_unlock(&init_mm);
> }
>
> -void arch_sync_dma_for_device(phys_addr_t addr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> unsigned long cl;
> struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
>
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - /* Write back the dcache for the requested range */
> - for (cl = addr; cl < addr + size;
> - cl += cpuinfo->dcache_block_size)
> - mtspr(SPR_DCBWR, cl);
> - break;
> - case DMA_FROM_DEVICE:
> - /* Invalidate the dcache for the requested range */
> - for (cl = addr; cl < addr + size;
> - cl += cpuinfo->dcache_block_size)
> - mtspr(SPR_DCBIR, cl);
> - break;
> - case DMA_BIDIRECTIONAL:
> - /* Flush the dcache for the requested range */
> - for (cl = addr; cl < addr + size;
> - cl += cpuinfo->dcache_block_size)
> - mtspr(SPR_DCBFR, cl);
> - break;
> - default:
> - break;
> - }
> + /* Write back the dcache for the requested range */
> + for (cl = paddr; cl < paddr + size;
> + cl += cpuinfo->dcache_block_size)
> + mtspr(SPR_DCBWR, cl);
> }
> +
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> +{
> + unsigned long cl;
> + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
> +
> + /* Invalidate the dcache for the requested range */
> + for (cl = paddr; cl < paddr + size;
> + cl += cpuinfo->dcache_block_size)
> + mtspr(SPR_DCBIR, cl);
> +}
> +
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> +{
> + unsigned long cl;
> + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
> +
> + /* Flush the dcache for the requested range */
> + for (cl = paddr; cl < paddr + size;
> + cl += cpuinfo->dcache_block_size)
> + mtspr(SPR_DCBFR, cl);
> +}
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return false;
> +}
> +
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return false;
> +}
> +
> +#include <linux/dma-sync.h>
> diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
> index 6d3d3cffb316..a7955aab8ce2 100644
> --- a/arch/parisc/kernel/pci-dma.c
> +++ b/arch/parisc/kernel/pci-dma.c
> @@ -443,35 +443,35 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
> free_pages((unsigned long)__va(dma_handle), order);
> }
>
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> unsigned long virt = (unsigned long)phys_to_virt(paddr);
>
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - clean_kernel_dcache_range(virt, size);
> - break;
> - case DMA_FROM_DEVICE:
> - clean_kernel_dcache_range(virt, size);
> - break;
> - case DMA_BIDIRECTIONAL:
> - flush_kernel_dcache_range(virt, size);
> - break;
> - }
> + clean_kernel_dcache_range(virt, size);
> }
>
> -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> {
> unsigned long virt = (unsigned long)phys_to_virt(paddr);
>
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - break;
> - case DMA_FROM_DEVICE:
> - case DMA_BIDIRECTIONAL:
> - purge_kernel_dcache_range(virt, size);
> - break;
> - }
> + purge_kernel_dcache_range(virt, size);
> +}
> +
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> +{
> + unsigned long virt = (unsigned long)phys_to_virt(paddr);
> +
> + flush_kernel_dcache_range(virt, size);
> }
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return true;
> +}
> +
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return true;
> +}
> +
> +#include <linux/dma-sync.h>
> diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
> index 00e59a4faa2b..268510c71156 100644
> --- a/arch/powerpc/mm/dma-noncoherent.c
> +++ b/arch/powerpc/mm/dma-noncoherent.c
> @@ -101,27 +101,33 @@ static void __dma_phys_op(phys_addr_t paddr, size_t size, enum dma_cache_op op)
> #endif
> }
>
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> __dma_phys_op(start, end, DMA_CACHE_CLEAN);
> }
>
> -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> {
> - switch (direction) {
> - case DMA_NONE:
> - BUG();
> - case DMA_TO_DEVICE:
> - break;
> - case DMA_FROM_DEVICE:
> - case DMA_BIDIRECTIONAL:
> - __dma_phys_op(start, end, DMA_CACHE_INVAL);
> - break;
> - }
> + __dma_phys_op(start, end, DMA_CACHE_INVAL);
> }
>
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> +{
> + __dma_phys_op(start, end, DMA_CACHE_FLUSH);
> +}
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return true;
> +}
> +
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return true;
> +}
> +
> +#include <linux/dma-sync.h>
> +
> void arch_dma_prep_coherent(struct page *page, size_t size)
> {
> unsigned long kaddr = (unsigned long)page_address(page);
> diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c
> index 69c80b2155a1..b9a9f57e02be 100644
> --- a/arch/riscv/mm/dma-noncoherent.c
> +++ b/arch/riscv/mm/dma-noncoherent.c
> @@ -12,43 +12,40 @@
>
> static bool noncoherent_supported;
>
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> void *vaddr = phys_to_virt(paddr);
>
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
> - break;
> - case DMA_FROM_DEVICE:
> - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
> - break;
> - case DMA_BIDIRECTIONAL:
> - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
> - break;
> - default:
> - break;
> - }
> + ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
> }
>
> -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> {
> void *vaddr = phys_to_virt(paddr);
>
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - break;
> - case DMA_FROM_DEVICE:
> - case DMA_BIDIRECTIONAL:
> - ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size);
> - break;
> - default:
> - break;
> - }
> + ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size);
> }
>
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> +{
> + void *vaddr = phys_to_virt(paddr);
> +
> + ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
> +}
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return true;
> +}
> +
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return true;
> +}
> +
> +#include <linux/dma-sync.h>
> +
> +
> void arch_dma_prep_coherent(struct page *page, size_t size)
> {
> void *flush_addr = page_address(page);
> diff --git a/arch/sh/kernel/dma-coherent.c b/arch/sh/kernel/dma-coherent.c
> index 6a44c0e7ba40..41f031ae7609 100644
> --- a/arch/sh/kernel/dma-coherent.c
> +++ b/arch/sh/kernel/dma-coherent.c
> @@ -12,22 +12,35 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
> __flush_purge_region(page_address(page), size);
> }
>
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> void *addr = sh_cacheop_vaddr(phys_to_virt(paddr));
>
> - switch (dir) {
> - case DMA_FROM_DEVICE: /* invalidate only */
> - __flush_invalidate_region(addr, size);
> - break;
> - case DMA_TO_DEVICE: /* writeback only */
> - __flush_wback_region(addr, size);
> - break;
> - case DMA_BIDIRECTIONAL: /* writeback and invalidate */
> - __flush_purge_region(addr, size);
> - break;
> - default:
> - BUG();
> - }
> + __flush_wback_region(addr, size);
> }
> +
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> +{
> + void *addr = sh_cacheop_vaddr(phys_to_virt(paddr));
> +
> + __flush_invalidate_region(addr, size);
> +}
> +
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> +{
> + void *addr = sh_cacheop_vaddr(phys_to_virt(paddr));
> +
> + __flush_purge_region(addr, size);
> +}
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return false;
> +}
> +
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return false;
> +}
> +
> +#include <linux/dma-sync.h>
> diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
> index 4f3d26066ec2..6926ead2f208 100644
> --- a/arch/sparc/kernel/ioport.c
> +++ b/arch/sparc/kernel/ioport.c
> @@ -300,21 +300,39 @@ arch_initcall(sparc_register_ioport);
>
> #endif /* CONFIG_SBUS */
>
> -/*
> - * IIep is write-through, not flushing on cpu to device transfer.
> - *
> - * On LEON systems without cache snooping, the entire D-CACHE must be flushed to
> - * make DMA to cacheable memory coherent.
> - */
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> - if (dir != DMA_TO_DEVICE &&
> - sparc_cpu_model == sparc_leon &&
> + /* IIep is write-through, not flushing on cpu to device transfer. */
> +}
> +
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> +{
> + /*
> + * On LEON systems without cache snooping, the entire D-CACHE must be
> + * flushed to make DMA to cacheable memory coherent.
> + */
> + if (sparc_cpu_model == sparc_leon &&
> !sparc_leon3_snooping_enabled())
> leon_flush_dcache_all();
> }
>
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> +{
> + arch_dma_cache_inv(paddr, size);
> +}
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return true;
> +}
> +
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return false;
> +}
> +
> +#include <linux/dma-sync.h>
> +
> #ifdef CONFIG_PROC_FS
>
> static int sparc_io_proc_show(struct seq_file *m, void *v)
> diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
> index ff3bf015eca4..d4ff96585545 100644
> --- a/arch/xtensa/kernel/pci-dma.c
> +++ b/arch/xtensa/kernel/pci-dma.c
> @@ -43,24 +43,34 @@ static void do_cache_op(phys_addr_t paddr, size_t size,
> }
> }
>
> -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> - enum dma_data_direction dir)
> +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
> {
> - switch (dir) {
> - case DMA_TO_DEVICE:
> - do_cache_op(paddr, size, __flush_dcache_range);
> - break;
> - case DMA_FROM_DEVICE:
> - do_cache_op(paddr, size, __invalidate_dcache_range);
> - break;
> - case DMA_BIDIRECTIONAL:
> - do_cache_op(paddr, size, __flush_invalidate_dcache_range);
> - break;
> - default:
> - break;
> - }
> + do_cache_op(paddr, size, __flush_dcache_range);
> }
>
> +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
> +{
> + do_cache_op(paddr, size, __invalidate_dcache_range);
> +}
> +
> +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
> +{
> + do_cache_op(paddr, size, __flush_invalidate_dcache_range);
> +}
> +
> +static inline bool arch_sync_dma_clean_before_fromdevice(void)
> +{
> + return false;
> +}
> +
> +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
> +{
> + return false;
> +}
> +
> +#include <linux/dma-sync.h>
> +
> +
> void arch_dma_prep_coherent(struct page *page, size_t size)
> {
> __invalidate_dcache_range((unsigned long)page_address(page), size);
> diff --git a/include/linux/dma-sync.h b/include/linux/dma-sync.h
> new file mode 100644
> index 000000000000..18e33d5e8eaf
> --- /dev/null
> +++ b/include/linux/dma-sync.h
> @@ -0,0 +1,107 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Cache operations depending on function and direction argument, inspired by
> + * https://lore.kernel.org/lkml/20180518175004.GF17671@xxxxxxxxxxxxxxxxxxxxx
> + * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20]
> + * dma-mapping: provide a generic dma-noncoherent implementation)"
> + *
> + * | map == for_device | unmap == for_cpu
> + * |----------------------------------------------------------------
> + * TO_DEV | writeback writeback | none none
> + * FROM_DEV | invalidate invalidate | invalidate* invalidate*
> + * BIDIR | writeback writeback | invalidate invalidate
> + *
> + * [*] needed for CPU speculative prefetches
> + *
> + * NOTE: we don't check the validity of direction argument as it is done in
> + * upper layer functions (in include/linux/dma-mapping.h)
> + *
> + * This file can be included by arch/.../kernel/dma-noncoherent.c to provide
> + * the respective high-level operations without having to expose the
> + * cache management ops to drivers.
> + */
> +
> +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
> + enum dma_data_direction dir)
> +{
> + switch (dir) {
> + case DMA_TO_DEVICE:
> + /*
> + * This may be an empty function on write-through caches,
> + * and it might invalidate the cache if an architecture has
> + * a write-back cache but no way to write it back without
> + * invalidating
> + */
> + arch_dma_cache_wback(paddr, size);
> + break;
> +
> + case DMA_FROM_DEVICE:
> + /*
> + * FIXME: this should be handled the same across all
> + * architectures, see
> + * https://lore.kernel.org/all/20220606152150.GA31568@willie-the-truck/
> + */
> + if (!arch_sync_dma_clean_before_fromdevice()) {
> + arch_dma_cache_inv(paddr, size);
> + break;
> + }
> + fallthrough;
> +
> + case DMA_BIDIRECTIONAL:
> + /* Skip the invalidate here if it's done later */
> + if (IS_ENABLED(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) &&
> + arch_sync_dma_cpu_needs_post_dma_flush())
> + arch_dma_cache_wback(paddr, size);
> + else
> + arch_dma_cache_wback_inv(paddr, size);
> + break;
> +
> + default:
> + break;
> + }
> +}
> +
> +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
> +/*
> + * Mark the D-cache clean for these pages to avoid extra flushing.
> + */
> +static void arch_dma_mark_dcache_clean(phys_addr_t paddr, size_t size)
> +{
> +#ifdef CONFIG_ARCH_DMA_MARK_DCACHE_CLEAN
> + unsigned long pfn = PFN_UP(paddr);
> + unsigned long off = paddr & (PAGE_SIZE - 1);
> + size_t left = size;
> +
> + if (off)
> + left -= PAGE_SIZE - off;
> +
> + while (left >= PAGE_SIZE) {
> + struct page *page = pfn_to_page(pfn++);
> + set_bit(PG_dcache_clean, &page->flags);
> + left -= PAGE_SIZE;
> + }
> +#endif
> +}
> +
> +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
> + enum dma_data_direction dir)
> +{
> + switch (dir) {
> + case DMA_TO_DEVICE:
> + break;
> +
> + case DMA_FROM_DEVICE:
> + case DMA_BIDIRECTIONAL:
> + /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */
> + if (arch_sync_dma_cpu_needs_post_dma_flush())
> + arch_dma_cache_inv(paddr, size);
> +
> + if (size > PAGE_SIZE)
> + arch_dma_mark_dcache_clean(paddr, size);
> + break;
> +
> + default:
> + break;
> + }
> +}
> +#endif
> --
> 2.39.2
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@xxxxxxxxxxxxxxxxxxx
> http://lists.infradead.org/mailman/listinfo/linux-riscv