Re: [PATCH kernel v8 07/31] vfio: powerpc/spapr: Moving pinning/unpinning to helpers

From: Alexey Kardashevskiy
Date: Wed Apr 15 2015 - 08:10:05 EST


On 04/15/2015 05:10 PM, David Gibson wrote:
On Fri, Apr 10, 2015 at 04:30:49PM +1000, Alexey Kardashevskiy wrote:
This is a pretty mechanical patch to make next patches simpler.

New tce_iommu_unuse_page() helper does put_page() now but it might skip
that after the memory registering patch applied.

As we are here, this removes unnecessary checks for a value returned
by pfn_to_page() as it cannot possibly return NULL.

This moves tce_iommu_disable() later to let tce_iommu_clear() know if
the container has been enabled because if it has not been, then
put_page() must not be called on TCEs from the TCE table. This situation
is not yet possible but it will after KVM acceleration patchset is
applied.

Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxxxx>
---
Changes:
v6:
* tce_get_hva() returns hva via a pointer
---
drivers/vfio/vfio_iommu_spapr_tce.c | 68 +++++++++++++++++++++++++++----------
1 file changed, 50 insertions(+), 18 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index c137bb3..ec5ee83 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -196,7 +196,6 @@ static void tce_iommu_release(void *iommu_data)
struct iommu_table *tbl = container->tbl;

WARN_ON(tbl && !tbl->it_group);
- tce_iommu_disable(container);

if (tbl) {
tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
@@ -204,63 +203,96 @@ static void tce_iommu_release(void *iommu_data)
if (tbl->it_group)
tce_iommu_detach_group(iommu_data, tbl->it_group);
}
+
+ tce_iommu_disable(container);
+
mutex_destroy(&container->lock);

kfree(container);
}

+static void tce_iommu_unuse_page(struct tce_container *container,
+ unsigned long oldtce)
+{
+ struct page *page;
+
+ if (!(oldtce & (TCE_PCI_READ | TCE_PCI_WRITE)))
+ return;
+
+ /*
+ * VFIO cannot map/unmap when a container is not enabled so
+ * we would not need this check but KVM could map/unmap and if
+ * this happened, we must not put pages as KVM does not get them as
+ * it expects memory pre-registation to do this part.
+ */
+ if (!container->enabled)
+ return;

This worries me a bit. How can whether the contained is enabled now
safely tell you whether get_page() at some earlier point in time?


This is a leftover, I'll remove it as after the "iommu v2" patch there will be tce_iommu_unuse_page_v2().


+
+ page = pfn_to_page(__pa(oldtce) >> PAGE_SHIFT);
+
+ if (oldtce & TCE_PCI_WRITE)
+ SetPageDirty(page);
+
+ put_page(page);
+}
+
static int tce_iommu_clear(struct tce_container *container,
struct iommu_table *tbl,
unsigned long entry, unsigned long pages)
{
unsigned long oldtce;
- struct page *page;

for ( ; pages; --pages, ++entry) {
oldtce = iommu_clear_tce(tbl, entry);
if (!oldtce)
continue;

- page = pfn_to_page(oldtce >> PAGE_SHIFT);
- WARN_ON(!page);
- if (page) {
- if (oldtce & TCE_PCI_WRITE)
- SetPageDirty(page);
- put_page(page);
- }
+ tce_iommu_unuse_page(container, (unsigned long) __va(oldtce));
}

return 0;
}

+static int tce_get_hva(unsigned long tce, unsigned long *hva)
+{
+ struct page *page = NULL;
+ enum dma_data_direction direction = iommu_tce_direction(tce);
+
+ if (get_user_pages_fast(tce & PAGE_MASK, 1,
+ direction != DMA_TO_DEVICE, &page) != 1)
+ return -EFAULT;
+
+ *hva = (unsigned long) page_address(page);
+
+ return 0;
+}

I'd prefer to see this called tce_iommu_use_page() for symmetry.


If I rename this one, then what would I call tce_get_hva_cached() from "fio: powerpc/spapr: Register memory and define IOMMU v2"?



+
static long tce_iommu_build(struct tce_container *container,
struct iommu_table *tbl,
unsigned long entry, unsigned long tce, unsigned long pages)
{
long i, ret = 0;
- struct page *page = NULL;
+ struct page *page;
unsigned long hva;
enum dma_data_direction direction = iommu_tce_direction(tce);

for (i = 0; i < pages; ++i) {
- ret = get_user_pages_fast(tce & PAGE_MASK, 1,
- direction != DMA_TO_DEVICE, &page);
- if (unlikely(ret != 1)) {
- ret = -EFAULT;
+ ret = tce_get_hva(tce, &hva);
+ if (ret)
break;
- }

+ page = pfn_to_page(__pa(hva) >> PAGE_SHIFT);
if (!tce_page_is_contained(page, tbl->it_page_shift)) {
ret = -EPERM;
break;
}

- hva = (unsigned long) page_address(page) +
- (tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK);
+ /* Preserve offset within IOMMU page */
+ hva |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;

ret = iommu_tce_build(tbl, entry + i, hva, direction);
if (ret) {
- put_page(page);
+ tce_iommu_unuse_page(container, hva);
pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
__func__, entry << tbl->it_page_shift,
tce, ret);



--
Alexey
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/