Re: [PATCH kernel v9 20/32] powerpc/powernv/ioda2: Introduce pnv_pci_create_table/pnv_pci_free_table

From: David Gibson
Date: Thu Apr 30 2015 - 00:39:51 EST


On Wed, Apr 29, 2015 at 07:12:37PM +1000, Alexey Kardashevskiy wrote:
> On 04/29/2015 02:39 PM, David Gibson wrote:
> >On Sat, Apr 25, 2015 at 10:14:44PM +1000, Alexey Kardashevskiy wrote:
> >>This is a part of moving TCE table allocation into an iommu_ops
> >>callback to support multiple IOMMU groups per one VFIO container.
> >>
> >>This moves a table creation window to the file with common powernv-pci
> >>helpers as it does not do anything IODA2-specific.
> >>
> >>This adds pnv_pci_free_table() helper to release the actual TCE table.
> >>
> >>This enforces window size to be a power of two.
> >>
> >>This should cause no behavioural change.
> >>
> >>Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxxxx>
> >>Reviewed-by: David Gibson <david@xxxxxxxxxxxxxxxxxxxxx>
> >>---
> >>Changes:
> >>v9:
> >>* moved helpers to the common powernv pci.c file from pci-ioda.c
> >>* moved bits from pnv_pci_create_table() to pnv_alloc_tce_table_pages()
> >>---
> >> arch/powerpc/platforms/powernv/pci-ioda.c | 36 ++++++------------
> >> arch/powerpc/platforms/powernv/pci.c | 61 +++++++++++++++++++++++++++++++
> >> arch/powerpc/platforms/powernv/pci.h | 4 ++
> >> 3 files changed, 76 insertions(+), 25 deletions(-)
> >>
> >>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> >>index a80be34..b9b3773 100644
> >>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
> >>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> >>@@ -1307,8 +1307,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
> >> if (rc)
> >> pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
> >>
> >>- iommu_reset_table(tbl, of_node_full_name(dev->dev.of_node));
> >>- free_pages(addr, get_order(TCE32_TABLE_SIZE));
> >>+ pnv_pci_free_table(tbl);
> >> }
> >>
> >> static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
> >>@@ -2039,10 +2038,7 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
> >> static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> >> struct pnv_ioda_pe *pe)
> >> {
> >>- struct page *tce_mem = NULL;
> >>- void *addr;
> >> struct iommu_table *tbl = &pe->table_group.tables[0];
> >>- unsigned int tce_table_size, end;
> >> int64_t rc;
> >>
> >> /* We shouldn't already have a 32-bit DMA associated */
> >>@@ -2053,29 +2049,20 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> >>
> >> /* The PE will reserve all possible 32-bits space */
> >> pe->tce32_seg = 0;
> >>- end = (1 << ilog2(phb->ioda.m32_pci_base));
> >>- tce_table_size = (end / 0x1000) * 8;
> >> pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
> >>- end);
> >>+ phb->ioda.m32_pci_base);
> >>
> >>- /* Allocate TCE table */
> >>- tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
> >>- get_order(tce_table_size));
> >>- if (!tce_mem) {
> >>- pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
> >>- goto fail;
> >>+ rc = pnv_pci_create_table(&pe->table_group, pe->phb->hose->node,
> >>+ 0, IOMMU_PAGE_SHIFT_4K, phb->ioda.m32_pci_base, tbl);
> >>+ if (rc) {
> >>+ pe_err(pe, "Failed to create 32-bit TCE table, err %ld", rc);
> >>+ return;
> >> }
> >>- addr = page_address(tce_mem);
> >>- memset(addr, 0, tce_table_size);
> >>-
> >>- /* Setup iommu */
> >>- tbl->it_table_group = &pe->table_group;
> >>-
> >>- /* Setup linux iommu table */
> >>- pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
> >>- IOMMU_PAGE_SHIFT_4K);
> >>
> >> tbl->it_ops = &pnv_ioda2_iommu_ops;
> >>+
> >>+ /* Setup iommu */
> >>+ tbl->it_table_group = &pe->table_group;
> >> iommu_init_table(tbl, phb->hose->node);
> >> #ifdef CONFIG_IOMMU_API
> >> pe->table_group.ops = &pnv_pci_ioda2_ops;
> >>@@ -2121,8 +2108,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> >> fail:
> >> if (pe->tce32_seg >= 0)
> >> pe->tce32_seg = -1;
> >>- if (tce_mem)
> >>- __free_pages(tce_mem, get_order(tce_table_size));
> >>+ pnv_pci_free_table(tbl);
> >> }
> >>
> >> static void pnv_ioda_setup_dma(struct pnv_phb *phb)
> >>diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
> >>index e8802ac..6bcfad5 100644
> >>--- a/arch/powerpc/platforms/powernv/pci.c
> >>+++ b/arch/powerpc/platforms/powernv/pci.c
> >>@@ -20,7 +20,9 @@
> >> #include <linux/io.h>
> >> #include <linux/msi.h>
> >> #include <linux/iommu.h>
> >>+#include <linux/memblock.h>
> >>
> >>+#include <asm/mmzone.h>
> >> #include <asm/sections.h>
> >> #include <asm/io.h>
> >> #include <asm/prom.h>
> >>@@ -645,6 +647,65 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
> >> tbl->it_type = TCE_PCI;
> >> }
> >>
> >>+static __be64 *pnv_alloc_tce_table_pages(int nid, unsigned shift,
> >>+ unsigned long *tce_table_allocated)
> >
> >I'm a bit confused by the tce_table_allocated parameter. What's the
> >circumstance where more memory is requested than required, and why
> >does it matter to the caller?
>
> It does not make much sense here but it does for "powerpc/powernv: Implement
> multilevel TCE tables" - I was trying to avoid changing same lines many
> times.
>
> The idea is if multilevel table is requested, I do not really want to
> allocate the whole tree. For example, if the userspace asked for 64K table
> and 5 levels, the result will be a list of just 5 pages - last one will be
> the actual table and upper levels will have a single valud TCE entry
> pointing to next level.
>
> But I change the prototype there anyway so I'll just move this
> tce_table_allocated thing there.

Yeah, I think that's better. It is more churn, but I think the
clearer reviewability is worth it.

--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: pgpTEdgRqy35H.pgp
Description: PGP signature