Re: [PATCH 01/34] powerpc: use mm zones more sensibly
From: Christoph Hellwig
Date: Thu Dec 06 2018 - 09:09:57 EST
Ben / Michael,
can we get this one queued up for 4.21 to prepare for the DMA work later
on?
On Wed, Nov 14, 2018 at 09:22:41AM +0100, Christoph Hellwig wrote:
> Powerpc has somewhat odd usage where ZONE_DMA is used for all memory on
> common 64-bit configfs, and ZONE_DMA32 is used for 31-bit schemes.
>
> Move to a scheme closer to what other architectures use (and I dare to
> say the intent of the system):
>
> - ZONE_DMA: optionally for memory < 31-bit (64-bit embedded only)
> - ZONE_NORMAL: everything addressable by the kernel
> - ZONE_HIGHMEM: memory > 32-bit for 32-bit kernels
>
> Also provide information on how ZONE_DMA is used by defining
> ARCH_ZONE_DMA_BITS.
>
> Contains various fixes from Benjamin Herrenschmidt.
>
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---
> arch/powerpc/Kconfig | 8 +---
> arch/powerpc/include/asm/page.h | 2 +
> arch/powerpc/include/asm/pgtable.h | 1 -
> arch/powerpc/kernel/dma-swiotlb.c | 6 +--
> arch/powerpc/kernel/dma.c | 7 +--
> arch/powerpc/mm/mem.c | 47 +++++++------------
> arch/powerpc/platforms/85xx/corenet_generic.c | 10 ----
> arch/powerpc/platforms/85xx/qemu_e500.c | 9 ----
> include/linux/mmzone.h | 2 +-
> 9 files changed, 25 insertions(+), 67 deletions(-)
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 8be31261aec8..cffff3613bc1 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -374,9 +374,9 @@ config PPC_ADV_DEBUG_DAC_RANGE
> depends on PPC_ADV_DEBUG_REGS && 44x
> default y
>
> -config ZONE_DMA32
> +config ZONE_DMA
> bool
> - default y if PPC64
> + default y if PPC_BOOK3E_64
>
> config PGTABLE_LEVELS
> int
> @@ -869,10 +869,6 @@ config ISA
> have an IBM RS/6000 or pSeries machine, say Y. If you have an
> embedded board, consult your board documentation.
>
> -config ZONE_DMA
> - bool
> - default y
> -
> config GENERIC_ISA_DMA
> bool
> depends on ISA_DMA_API
> diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
> index f6a1265face2..fc8c9ac0c6be 100644
> --- a/arch/powerpc/include/asm/page.h
> +++ b/arch/powerpc/include/asm/page.h
> @@ -354,4 +354,6 @@ typedef struct page *pgtable_t;
> #endif /* __ASSEMBLY__ */
> #include <asm/slice.h>
>
> +#define ARCH_ZONE_DMA_BITS 31
> +
> #endif /* _ASM_POWERPC_PAGE_H */
> diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
> index 9679b7519a35..8af32ce93c7f 100644
> --- a/arch/powerpc/include/asm/pgtable.h
> +++ b/arch/powerpc/include/asm/pgtable.h
> @@ -66,7 +66,6 @@ extern unsigned long empty_zero_page[];
>
> extern pgd_t swapper_pg_dir[];
>
> -void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn);
> int dma_pfn_limit_to_zone(u64 pfn_limit);
> extern void paging_init(void);
>
> diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
> index 5fc335f4d9cd..678811abccfc 100644
> --- a/arch/powerpc/kernel/dma-swiotlb.c
> +++ b/arch/powerpc/kernel/dma-swiotlb.c
> @@ -108,12 +108,8 @@ int __init swiotlb_setup_bus_notifier(void)
>
> void __init swiotlb_detect_4g(void)
> {
> - if ((memblock_end_of_DRAM() - 1) > 0xffffffff) {
> + if ((memblock_end_of_DRAM() - 1) > 0xffffffff)
> ppc_swiotlb_enable = 1;
> -#ifdef CONFIG_ZONE_DMA32
> - limit_zone_pfn(ZONE_DMA32, (1ULL << 32) >> PAGE_SHIFT);
> -#endif
> - }
> }
>
> static int __init check_swiotlb_enabled(void)
> diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
> index dbfc7056d7df..6551685a4ed0 100644
> --- a/arch/powerpc/kernel/dma.c
> +++ b/arch/powerpc/kernel/dma.c
> @@ -50,7 +50,7 @@ static int dma_nommu_dma_supported(struct device *dev, u64 mask)
> return 1;
>
> #ifdef CONFIG_FSL_SOC
> - /* Freescale gets another chance via ZONE_DMA/ZONE_DMA32, however
> + /* Freescale gets another chance via ZONE_DMA, however
> * that will have to be refined if/when they support iommus
> */
> return 1;
> @@ -94,13 +94,10 @@ void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
> }
>
> switch (zone) {
> +#ifdef CONFIG_ZONE_DMA
> case ZONE_DMA:
> flag |= GFP_DMA;
> break;
> -#ifdef CONFIG_ZONE_DMA32
> - case ZONE_DMA32:
> - flag |= GFP_DMA32;
> - break;
> #endif
> };
> #endif /* CONFIG_FSL_SOC */
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index 0a64fffabee1..c0b676c3a5ba 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -246,35 +246,19 @@ static int __init mark_nonram_nosave(void)
> }
> #endif
>
> -static bool zone_limits_final;
> -
> /*
> - * The memory zones past TOP_ZONE are managed by generic mm code.
> - * These should be set to zero since that's what every other
> - * architecture does.
> + * Zones usage:
> + *
> + * We setup ZONE_DMA to be 31-bits on all platforms and ZONE_NORMAL to be
> + * everything else. GFP_DMA32 page allocations automatically fall back to
> + * ZONE_DMA.
> + *
> + * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
> + * inform the generic DMA mapping code. 32-bit only devices (if not handled
> + * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
> + * otherwise served by ZONE_DMA.
> */
> -static unsigned long max_zone_pfns[MAX_NR_ZONES] = {
> - [0 ... TOP_ZONE ] = ~0UL,
> - [TOP_ZONE + 1 ... MAX_NR_ZONES - 1] = 0
> -};
> -
> -/*
> - * Restrict the specified zone and all more restrictive zones
> - * to be below the specified pfn. May not be called after
> - * paging_init().
> - */
> -void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit)
> -{
> - int i;
> -
> - if (WARN_ON(zone_limits_final))
> - return;
> -
> - for (i = zone; i >= 0; i--) {
> - if (max_zone_pfns[i] > pfn_limit)
> - max_zone_pfns[i] = pfn_limit;
> - }
> -}
> +static unsigned long max_zone_pfns[MAX_NR_ZONES];
>
> /*
> * Find the least restrictive zone that is entirely below the
> @@ -324,11 +308,14 @@ void __init paging_init(void)
> printk(KERN_DEBUG "Memory hole size: %ldMB\n",
> (long int)((top_of_ram - total_ram) >> 20));
>
> +#ifdef CONFIG_ZONE_DMA
> + max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffffffUL >> PAGE_SHIFT);
> +#endif
> + max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
> #ifdef CONFIG_HIGHMEM
> - limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT);
> + max_zone_pfns[ZONE_HIGHMEM] = max_pfn;
> #endif
> - limit_zone_pfn(TOP_ZONE, top_of_ram >> PAGE_SHIFT);
> - zone_limits_final = true;
> +
> free_area_init_nodes(max_zone_pfns);
>
> mark_nonram_nosave();
> diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
> index ac191a7a1337..b0dac307bebf 100644
> --- a/arch/powerpc/platforms/85xx/corenet_generic.c
> +++ b/arch/powerpc/platforms/85xx/corenet_generic.c
> @@ -68,16 +68,6 @@ void __init corenet_gen_setup_arch(void)
>
> swiotlb_detect_4g();
>
> -#if defined(CONFIG_FSL_PCI) && defined(CONFIG_ZONE_DMA32)
> - /*
> - * Inbound windows don't cover the full lower 4 GiB
> - * due to conflicts with PCICSRBAR and outbound windows,
> - * so limit the DMA32 zone to 2 GiB, to allow consistent
> - * allocations to succeed.
> - */
> - limit_zone_pfn(ZONE_DMA32, 1UL << (31 - PAGE_SHIFT));
> -#endif
> -
> pr_info("%s board\n", ppc_md.name);
>
> mpc85xx_qe_init();
> diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
> index b63a8548366f..27631c607f3d 100644
> --- a/arch/powerpc/platforms/85xx/qemu_e500.c
> +++ b/arch/powerpc/platforms/85xx/qemu_e500.c
> @@ -45,15 +45,6 @@ static void __init qemu_e500_setup_arch(void)
>
> fsl_pci_assign_primary();
> swiotlb_detect_4g();
> -#if defined(CONFIG_FSL_PCI) && defined(CONFIG_ZONE_DMA32)
> - /*
> - * Inbound windows don't cover the full lower 4 GiB
> - * due to conflicts with PCICSRBAR and outbound windows,
> - * so limit the DMA32 zone to 2 GiB, to allow consistent
> - * allocations to succeed.
> - */
> - limit_zone_pfn(ZONE_DMA32, 1UL << (31 - PAGE_SHIFT));
> -#endif
> mpc85xx_smp_init();
> }
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 847705a6d0ec..e2d01ccd071d 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -314,7 +314,7 @@ enum zone_type {
> * Architecture Limit
> * ---------------------------
> * parisc, ia64, sparc <4G
> - * s390 <2G
> + * s390, powerpc <2G
> * arm Various
> * alpha Unlimited or 0-16MB.
> *
> --
> 2.19.1
>
> _______________________________________________
> iommu mailing list
> iommu@xxxxxxxxxxxxxxxxxxxxxxxxxx
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
---end quoted text---