Re: use generic DMA mapping code in powerpc V4

From: Christian Zigotzky
Date: Fri Nov 30 2018 - 07:24:04 EST


Hi Christoph,

Thanks a lot for your fast reply.

On 30 November 2018 at 11:53AM, Christoph Hellwig wrote:
Hi Christian,

for such a diverse architecture like powerpc we'll have to rely on
users / non core developers like you to help with testing.
I see. I will help as good as I can.

Can you try the patch below for he cyrus config?
Yes, of course. I patched your Git kernel and after that I compiled it again. U-Boot loads the kernel and the dtb file. Then the kernel starts but it doesn't find any hard disks (partitions).

@All
Could you please also test Christoph's kernel on your PASEMI and NXP boards? Download: 'git clone git://git.infradead.org/users/hch/misc.git -b powerpc-dma.4 a'
*PLEASE*

For the nemo one I have no idea yet,
We had some problems with the PASEMI ethernet and DMA two years ago. I had to deactivate the option PASEMI_IOMMU_DMA_FORCE.

commit 416f37d0816b powerpc/pasemi: Fix coherent_dma_mask for dma engine:

Commit 817820b0 ("powerpc/iommu: Support "hybrid" iommu/direct DMA
ops for coherent_mask < dma_mask) adds a check of coherent_dma_mask for
dma allocations.

Unfortunately current PASemi code does not set this value for the DMA
engine, which ends up with the default value of 0xffffffff, the result
is on a PASemi system with >2Gb ram and iommu enabled the onboard
ethernet stops working due to an inability to allocate memory. Add an
initialisation to pci_dma_dev_setup_pasemi().
Signed-off-by: Darren Stevens <darren@xxxxxxxxxxxxxxxx>
Signed-off-by: Michael Ellerman <mpe@xxxxxxxxxxxxxx>

Links:
https://lists.ozlabs.org/pipermail/linuxppc-dev/2016-July/146701.html
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=416f37d0816b9720b8227953e55954d81456f991

FYI: DMA handling has been rewritten in 2015. We had some problems with the new DMA code in 2015. I had to revert the commit ' [RFC/PATCH,v2] powerpc/iommu: Support "hybrid" iommu/direct DMA ops for coherent_mask < dma_mask' in 2015.

Link: https://patchwork.ozlabs.org/patch/472535/

I had to create a patch in 2015:

ÂÂÂ diff -rupN linux-4.4/arch/powerpc/Kconfig linux-4.4-nemo/arch/powerpc/Kconfig
ÂÂÂ --- linux-4.4/arch/powerpc/KconfigÂÂÂ 2015-12-07 00:43:12.000000000 +0100
ÂÂÂ +++ linux-4.4-nemo/arch/powerpc/KconfigÂÂÂ 2015-12-07 14:48:23.371987988 +0100
ÂÂÂ @@ -158,8 +155,6 @@ config PPC
ÂÂÂÂÂÂÂÂ select HAVE_PERF_EVENTS_NMI if PPC64
ÂÂÂÂÂÂÂÂ select EDAC_SUPPORT
ÂÂÂÂÂÂÂÂ select EDAC_ATOMIC_SCRUB
ÂÂÂ -ÂÂÂ select ARCH_HAS_DMA_SET_COHERENT_MASK
ÂÂÂ -ÂÂÂ select HAVE_ARCH_SECCOMP_FILTER

ÂÂÂÂ config GENERIC_CSUM
ÂÂÂÂÂÂÂÂ def_bool CPU_LITTLE_ENDIAN
ÂÂÂ @@ -419,8 +414,7 @@ config PPC64_SUPPORTS_MEMORY_FAILURE

ÂÂÂÂ config KEXEC
ÂÂÂÂÂÂÂÂ bool "kexec system call"
ÂÂÂ -ÂÂÂ depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) || PPC_BOOK3E
ÂÂÂ -ÂÂÂ select KEXEC_CORE
ÂÂÂ +ÂÂÂ depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP))
ÂÂÂÂÂÂÂÂ help
ÂÂÂÂÂÂÂÂÂÂ kexec is a system call that implements the ability to shutdown your
 current kernel, and to start another kernel. It is like a reboot

ÂÂÂ diff -rupN linux-4.4/arch/powerpc/kernel/dma.c linux-4.4-nemo/arch/powerpc/kernel/dma.c
ÂÂÂ --- linux-4.4/arch/powerpc/kernel/dma.cÂÂÂ 2015-12-07 00:43:12.000000000 +0100
ÂÂÂ +++ linux-4.4-nemo/arch/powerpc/kernel/dma.cÂÂÂ 2015-12-07 14:49:38.098286892 +0100
ÂÂÂ @@ -40,31 +39,9 @@ static u64 __maybe_unused get_pfn_limit(
ÂÂÂÂÂÂÂÂ return pfn;
ÂÂÂÂ }

ÂÂÂ -static int dma_direct_dma_supported(struct device *dev, u64 mask)
ÂÂÂ -{
ÂÂÂ -#ifdef CONFIG_PPC64
ÂÂÂ -ÂÂÂ u64 limit = get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
ÂÂÂ -
ÂÂÂ -ÂÂÂ /* Limit fits in the mask, we are good */
ÂÂÂ -ÂÂÂ if (mask >= limit)
ÂÂÂ -ÂÂÂÂÂÂÂ return 1;
ÂÂÂ -
ÂÂÂ -#ifdef CONFIG_FSL_SOC
ÂÂÂ -ÂÂÂ /* Freescale gets another chance via ZONE_DMA/ZONE_DMA32, however
ÂÂÂ -ÂÂÂÂ * that will have to be refined if/when they support iommus
ÂÂÂ -ÂÂÂÂ */
ÂÂÂ -ÂÂÂ return 1;
ÂÂÂ -#endif
ÂÂÂ -ÂÂÂ /* Sorry ... */
ÂÂÂ -ÂÂÂ return 0;
ÂÂÂ -#else
ÂÂÂ -ÂÂÂ return 1;
ÂÂÂ -#endif
ÂÂÂ -}
ÂÂÂ -
ÂÂÂ -void *__dma_direct_alloc_coherent(struct device *dev, size_t size,
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ dma_addr_t *dma_handle, gfp_t flag,
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct dma_attrs *attrs)
ÂÂÂ +void *dma_direct_alloc_coherent(struct device *dev, size_t size,
ÂÂÂ +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ dma_addr_t *dma_handle, gfp_t flag,
ÂÂÂ +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct dma_attrs *attrs)
ÂÂÂÂ {
ÂÂÂÂÂÂÂÂ void *ret;
ÂÂÂÂ #ifdef CONFIG_NOT_COHERENT_CACHE
ÂÂÂ @@ -119,9 +96,9 @@ void *__dma_direct_alloc_coherent(struct
ÂÂÂÂ #endif
ÂÂÂÂ }

ÂÂÂ -void __dma_direct_free_coherent(struct device *dev, size_t size,
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ void *vaddr, dma_addr_t dma_handle,
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct dma_attrs *attrs)
ÂÂÂ +void dma_direct_free_coherent(struct device *dev, size_t size,
ÂÂÂ +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ void *vaddr, dma_addr_t dma_handle,
ÂÂÂ +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct dma_attrs *attrs)
ÂÂÂÂ {
ÂÂÂÂ #ifdef CONFIG_NOT_COHERENT_CACHE
ÂÂÂÂÂÂÂÂ __dma_free_coherent(size, vaddr);
ÂÂÂ @@ -130,51 +107,6 @@ void __dma_direct_free_coherent(struct d
ÂÂÂÂ #endif
ÂÂÂÂ }

ÂÂÂ -static void *dma_direct_alloc_coherent(struct device *dev, size_t size,
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ dma_addr_t *dma_handle, gfp_t flag,
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct dma_attrs *attrs)
ÂÂÂ -{
ÂÂÂ -ÂÂÂ struct iommu_table *iommu;
ÂÂÂ -
ÂÂÂ -ÂÂÂ /* The coherent mask may be smaller than the real mask, check if
ÂÂÂ -ÂÂÂÂ * we can really use the direct ops
ÂÂÂ -ÂÂÂÂ */
ÂÂÂ -ÂÂÂ if (dma_direct_dma_supported(dev, dev->coherent_dma_mask))
ÂÂÂ -ÂÂÂÂÂÂÂ return __dma_direct_alloc_coherent(dev, size, dma_handle,
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ flag, attrs);
ÂÂÂ -
ÂÂÂ -ÂÂÂ /* Ok we can't ... do we have an iommu ? If not, fail */
ÂÂÂ -ÂÂÂ iommu = get_iommu_table_base(dev);
ÂÂÂ -ÂÂÂ if (!iommu)
ÂÂÂ -ÂÂÂÂÂÂÂ return NULL;
ÂÂÂ -
ÂÂÂ -ÂÂÂ /* Try to use the iommu */
ÂÂÂ -ÂÂÂ return iommu_alloc_coherent(dev, iommu, size, dma_handle,
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ dev->coherent_dma_mask, flag,
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ dev_to_node(dev));
ÂÂÂ -}
ÂÂÂ -
ÂÂÂ -static void dma_direct_free_coherent(struct device *dev, size_t size,
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ void *vaddr, dma_addr_t dma_handle,
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct dma_attrs *attrs)
ÂÂÂ -{
ÂÂÂ -ÂÂÂ struct iommu_table *iommu;
ÂÂÂ -
ÂÂÂ -ÂÂÂ /* See comments in dma_direct_alloc_coherent() */
ÂÂÂ -ÂÂÂ if (dma_direct_dma_supported(dev, dev->coherent_dma_mask))
ÂÂÂ -ÂÂÂÂÂÂÂ return __dma_direct_free_coherent(dev, size, vaddr, dma_handle,
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ attrs);
ÂÂÂ -ÂÂÂ /* Maybe we used an iommu ... */
ÂÂÂ -ÂÂÂ iommu = get_iommu_table_base(dev);
ÂÂÂ -
ÂÂÂ -ÂÂÂ /* If we hit that we should have never allocated in the first
ÂÂÂ -ÂÂÂÂ * place so how come we are freeing ?
ÂÂÂ -ÂÂÂÂ */
ÂÂÂ -ÂÂÂ if (WARN_ON(!iommu))
ÂÂÂ -ÂÂÂÂÂÂÂ return;
ÂÂÂ -ÂÂÂ iommu_free_coherent(iommu, size, vaddr, dma_handle);
ÂÂÂ -}
ÂÂÂ -
ÂÂÂÂ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ void *cpu_addr, dma_addr_t handle, size_t size,
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct dma_attrs *attrs)
ÂÂÂ @@ -215,6 +147,18 @@ static void dma_direct_unmap_sg(struct d
ÂÂÂÂ {
ÂÂÂÂ }

ÂÂÂ +static int dma_direct_dma_supported(struct device *dev, u64 mask)
ÂÂÂ +{
ÂÂÂ +#ifdef CONFIG_PPC64
ÂÂÂ +ÂÂÂ /* Could be improved so platforms can set the limit in case
ÂÂÂ +ÂÂÂÂ * they have limited DMA windows
ÂÂÂ +ÂÂÂÂ */
ÂÂÂ +ÂÂÂ return mask >= get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
ÂÂÂ +#else
ÂÂÂ +ÂÂÂ return 1;
ÂÂÂ +#endif
ÂÂÂ +}
ÂÂÂ +
ÂÂÂÂ static u64 dma_direct_get_required_mask(struct device *dev)
ÂÂÂÂ {
ÂÂÂÂÂÂÂÂ u64 end, mask;
ÂÂÂ @@ -286,25 +230,6 @@ struct dma_map_ops dma_direct_ops = {
ÂÂÂÂ };
ÂÂÂÂ EXPORT_SYMBOL(dma_direct_ops);

ÂÂÂ -int dma_set_coherent_mask(struct device *dev, u64 mask)
ÂÂÂ -{
ÂÂÂ -ÂÂÂ if (!dma_supported(dev, mask)) {
ÂÂÂ -ÂÂÂÂÂÂÂ /*
ÂÂÂ -ÂÂÂÂÂÂÂÂ * We need to special case the direct DMA ops which can
ÂÂÂ -ÂÂÂÂÂÂÂÂ * support a fallback for coherent allocations. There
ÂÂÂ -ÂÂÂÂÂÂÂÂ * is no dma_op->set_coherent_mask() so we have to do
ÂÂÂ -ÂÂÂÂÂÂÂÂ * things the hard way:
ÂÂÂ -ÂÂÂÂÂÂÂÂ */
ÂÂÂ -ÂÂÂÂÂÂÂ if (get_dma_ops(dev) != &dma_direct_ops ||
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂ get_iommu_table_base(dev) == NULL ||
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂ !dma_iommu_dma_supported(dev, mask))
ÂÂÂ -ÂÂÂÂÂÂÂÂÂÂÂ return -EIO;
ÂÂÂ -ÂÂÂ }
ÂÂÂ -ÂÂÂ dev->coherent_dma_mask = mask;
ÂÂÂ -ÂÂÂ return 0;
ÂÂÂ -}
ÂÂÂ -EXPORT_SYMBOL(dma_set_coherent_mask);
ÂÂÂ -
ÂÂÂÂ #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)

ÂÂÂÂ int __dma_set_mask(struct device *dev, u64 dma_mask)

Interesting PASEMI ethernet files:

arch/powerpc/platforms/pasemi/iommu.c
drivers/net/ethernet/pasemi/pasemi_mac.c
drivers/net/ethernet/pasemi/pasemi_mac.h
drivers/net/ethernet/pasemi/pasemi_mac_ethtool.c
drivers/net/ethernet/pasemi/Makefile
drivers/net/ethernet/pasemi/Kconfig

I know this is a lot of information but I hope it helps.

Thanks,
Christian