[PATCH v3 5/6] iommu/rockchip: use DMA API to map, to flush cache

From: Shunqian Zheng
Date: Wed Jun 15 2016 - 08:08:30 EST


Use DMA API instead of architecture internal functions like
__cpuc_flush_dcache_area() etc.

To support the virtual device like DRM the virtual slave iommu
added in the previous patch, attaching to which the DRM can use
it own domain->dev for dma_map_*(), dma_sync_*() even VOP is disabled.

With this patch, this driver is available for ARM64 like RK3399.

Signed-off-by: Shunqian Zheng <zhengsq@xxxxxxxxxxxxxx>
---
drivers/iommu/rockchip-iommu.c | 113 +++++++++++++++++++++++++++--------------
1 file changed, 76 insertions(+), 37 deletions(-)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 82ecc99..b60b29e 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -4,8 +4,6 @@
* published by the Free Software Foundation.
*/

-#include <asm/cacheflush.h>
-#include <asm/pgtable.h>
#include <linux/compiler.h>
#include <linux/delay.h>
#include <linux/device.h>
@@ -78,7 +76,9 @@

struct rk_iommu_domain {
struct list_head iommus;
+ struct device *dev;
u32 *dt; /* page directory table */
+ dma_addr_t dt_dma;
spinlock_t iommus_lock; /* lock for iommus list */
spinlock_t dt_lock; /* lock for modifying page directory table */

@@ -102,14 +102,12 @@ static inline bool rk_iommu_is_virtual(struct rk_iommu *iommu)
return iommu->num_mmu == 0;
}

-static inline void rk_table_flush(u32 *va, unsigned int count)
+static inline void rk_table_flush(struct device *dev, dma_addr_t dma,
+ unsigned int count)
{
- phys_addr_t pa_start = virt_to_phys(va);
- phys_addr_t pa_end = virt_to_phys(va + count);
- size_t size = pa_end - pa_start;
+ size_t size = count * 4; /* count of entry, 4 bytes per entry */

- __cpuc_flush_dcache_area(va, size);
- outer_flush_range(pa_start, pa_end);
+ dma_sync_single_for_device(dev, dma, size, DMA_TO_DEVICE);
}

static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom)
@@ -192,10 +190,9 @@ static inline bool rk_dte_is_pt_valid(u32 dte)
return dte & RK_DTE_PT_VALID;
}

-static u32 rk_mk_dte(u32 *pt)
+static inline u32 rk_mk_dte(dma_addr_t pt_dma)
{
- phys_addr_t pt_phys = virt_to_phys(pt);
- return (pt_phys & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID;
+ return (pt_dma & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID;
}

/*
@@ -613,12 +610,14 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
dma_addr_t iova)
{
u32 *page_table, *dte_addr;
- u32 dte;
+ u32 dte_index, dte;
phys_addr_t pt_phys;
+ dma_addr_t pt_dma;

assert_spin_locked(&rk_domain->dt_lock);

- dte_addr = &rk_domain->dt[rk_iova_dte_index(iova)];
+ dte_index = rk_iova_dte_index(iova);
+ dte_addr = &rk_domain->dt[dte_index];
dte = *dte_addr;
if (rk_dte_is_pt_valid(dte))
goto done;
@@ -627,19 +626,28 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
if (!page_table)
return ERR_PTR(-ENOMEM);

- dte = rk_mk_dte(page_table);
- *dte_addr = dte;
+ pt_dma = dma_map_single(rk_domain->dev, page_table,
+ SPAGE_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(rk_domain->dev, pt_dma)) {
+ dev_err(rk_domain->dev,
+ "DMA mapping error while allocating page table\n");
+ free_page((unsigned long)page_table);
+ return ERR_PTR(-ENOMEM);
+ }

- rk_table_flush(page_table, NUM_PT_ENTRIES);
- rk_table_flush(dte_addr, 1);
+ dte = rk_mk_dte(pt_dma);
+ *dte_addr = dte;

+ rk_table_flush(rk_domain->dev, pt_dma, NUM_PT_ENTRIES);
+ rk_table_flush(rk_domain->dev, rk_domain->dt_dma + dte_index * 4, 1);
done:
pt_phys = rk_dte_pt_address(dte);
return (u32 *)phys_to_virt(pt_phys);
}

static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain,
- u32 *pte_addr, dma_addr_t iova, size_t size)
+ u32 *pte_addr, dma_addr_t pte_dma,
+ size_t size)
{
unsigned int pte_count;
unsigned int pte_total = size / SPAGE_SIZE;
@@ -654,14 +662,14 @@ static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain,
pte_addr[pte_count] = rk_mk_pte_invalid(pte);
}

- rk_table_flush(pte_addr, pte_count);
+ rk_table_flush(rk_domain->dev, pte_dma, pte_count);

return pte_count * SPAGE_SIZE;
}

static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
- dma_addr_t iova, phys_addr_t paddr, size_t size,
- int prot)
+ dma_addr_t pte_dma, dma_addr_t iova,
+ phys_addr_t paddr, size_t size, int prot)
{
unsigned int pte_count;
unsigned int pte_total = size / SPAGE_SIZE;
@@ -680,7 +688,7 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
paddr += SPAGE_SIZE;
}

- rk_table_flush(pte_addr, pte_count);
+ rk_table_flush(rk_domain->dev, pte_dma, pte_total);

/*
* Zap the first and last iova to evict from iotlb any previously
@@ -693,7 +701,8 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
return 0;
unwind:
/* Unmap the range of iovas that we just mapped */
- rk_iommu_unmap_iova(rk_domain, pte_addr, iova, pte_count * SPAGE_SIZE);
+ rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma,
+ pte_count * SPAGE_SIZE);

iova += pte_count * SPAGE_SIZE;
page_phys = rk_pte_page_address(pte_addr[pte_count]);
@@ -708,8 +717,9 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
{
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
- dma_addr_t iova = (dma_addr_t)_iova;
+ dma_addr_t pte_dma, iova = (dma_addr_t)_iova;
u32 *page_table, *pte_addr;
+ u32 dte_index, pte_index;
int ret;

spin_lock_irqsave(&rk_domain->dt_lock, flags);
@@ -727,8 +737,13 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
return PTR_ERR(page_table);
}

- pte_addr = &page_table[rk_iova_pte_index(iova)];
- ret = rk_iommu_map_iova(rk_domain, pte_addr, iova, paddr, size, prot);
+ dte_index = rk_domain->dt[rk_iova_dte_index(iova)];
+ pte_index = rk_iova_pte_index(iova);
+ pte_addr = &page_table[pte_index];
+ pte_dma = rk_dte_pt_address(dte_index) + pte_index * 4;
+ ret = rk_iommu_map_iova(rk_domain, pte_addr, pte_dma, iova,
+ paddr, size, prot);
+
spin_unlock_irqrestore(&rk_domain->dt_lock, flags);

return ret;
@@ -739,7 +754,7 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
{
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
- dma_addr_t iova = (dma_addr_t)_iova;
+ dma_addr_t pte_dma, iova = (dma_addr_t)_iova;
phys_addr_t pt_phys;
u32 dte;
u32 *pte_addr;
@@ -763,7 +778,8 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,

pt_phys = rk_dte_pt_address(dte);
pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova);
- unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, iova, size);
+ pte_dma = pt_phys + rk_iova_pte_index(iova) * 4;
+ unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, size);

spin_unlock_irqrestore(&rk_domain->dt_lock, flags);

@@ -875,7 +891,6 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
int ret, i;
- phys_addr_t dte_addr;

iommu = rk_iommu_from_dev(dev);

@@ -886,6 +901,22 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
return -ENOMEM;
}

+ /* Set the domain dev to virtual one if exist */
+ if (rk_iommu_is_virtual(iommu) || !rk_domain->dev)
+ rk_domain->dev = iommu->dev;
+
+ if (!rk_domain->dt_dma) {
+ rk_domain->dt_dma = dma_map_single(rk_domain->dev,
+ rk_domain->dt, SPAGE_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(rk_domain->dev, rk_domain->dt_dma)) {
+ dev_err(rk_domain->dev, "dma map error for DT\n");
+ return -ENOMEM;
+ }
+
+ rk_table_flush(rk_domain->dev, rk_domain->dt_dma,
+ NUM_DT_ENTRIES);
+ }
+
iommu->domain = domain;
if (rk_iommu_is_virtual(iommu)) {
dev_dbg(dev, "Attach virtual device to iommu domain\n");
@@ -894,28 +925,28 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,

ret = rk_iommu_enable_stall(iommu);
if (ret)
- return ret;
+ goto unmap;

ret = rk_iommu_force_reset(iommu);
if (ret)
- return ret;
+ goto unmap;


ret = devm_request_irq(iommu->dev, iommu->irq, rk_iommu_irq,
IRQF_SHARED, dev_name(dev), iommu);
if (ret)
- return ret;
+ goto unmap;

- dte_addr = virt_to_phys(rk_domain->dt);
for (i = 0; i < iommu->num_mmu; i++) {
- rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr);
+ rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,
+ rk_domain->dt_dma);
rk_iommu_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
}

ret = rk_iommu_enable_paging(iommu);
if (ret)
- return ret;
+ goto unmap;

spin_lock_irqsave(&rk_domain->iommus_lock, flags);
list_add_tail(&iommu->node, &rk_domain->iommus);
@@ -926,6 +957,11 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
rk_iommu_disable_stall(iommu);

return 0;
+
+unmap:
+ dma_unmap_single(rk_domain->dev, rk_domain->dt_dma, SPAGE_SIZE,
+ DMA_TO_DEVICE);
+ return ret;
}

static void rk_iommu_detach_device(struct iommu_domain *domain,
@@ -987,8 +1023,6 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
if (!rk_domain->dt)
goto err_dt;

- rk_table_flush(rk_domain->dt, NUM_DT_ENTRIES);
-
spin_lock_init(&rk_domain->iommus_lock);
spin_lock_init(&rk_domain->dt_lock);
INIT_LIST_HEAD(&rk_domain->iommus);
@@ -1012,10 +1046,15 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
if (rk_dte_is_pt_valid(dte)) {
phys_addr_t pt_phys = rk_dte_pt_address(dte);
u32 *page_table = phys_to_virt(pt_phys);
+ dma_unmap_single(rk_domain->dev, pt_phys,
+ SPAGE_SIZE, DMA_TO_DEVICE);
free_page((unsigned long)page_table);
}
}

+ if (!rk_domain->dt_dma)
+ dma_unmap_single(rk_domain->dev, rk_domain->dt_dma,
+ SPAGE_SIZE, DMA_TO_DEVICE);
free_page((unsigned long)rk_domain->dt);

iommu_put_dma_cookie(&rk_domain->domain);
--
1.9.1