Re: [PATCH v2 7/7] iommu/riscv: Paging domain support

From: Baolu Lu
Date: Mon Apr 22 2024 - 01:22:46 EST


On 4/19/24 12:32 AM, Tomasz Jeznach wrote:
Introduce first-stage address translation support.

Page table configured by the IOMMU driver will use the same format
as the CPU’s MMU, and will fallback to identity translation if the
page table format configured for the MMU is not supported by the
IOMMU hardware.

This change introduces IOTINVAL.VMA command, required to invalidate
any cached IOATC entries after mapping is updated and/or removed from
the paging domain. Invalidations for the non-leaf page entries will
be added to the driver code in separate patch series, following spec
update to clarify non-leaf cache invalidation command. With this patch,
allowing only 4K mappings and keeping non-leaf page entries in memory
this should be a reasonable simplification.

Signed-off-by: Tomasz Jeznach <tjeznach@xxxxxxxxxxxx>
---
drivers/iommu/riscv/Kconfig | 1 +
drivers/iommu/riscv/iommu.c | 467 +++++++++++++++++++++++++++++++++++-
2 files changed, 466 insertions(+), 2 deletions(-)


[...]

+
static int riscv_iommu_attach_domain(struct riscv_iommu_device *iommu,
struct device *dev,
struct iommu_domain *iommu_domain)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct riscv_iommu_domain *domain;
struct riscv_iommu_dc *dc;
+ struct riscv_iommu_bond *bond = NULL, *b;
+ struct riscv_iommu_command cmd;
u64 fsc, ta, tc;
int i;
@@ -769,6 +838,20 @@ static int riscv_iommu_attach_domain(struct riscv_iommu_device *iommu,
ta = 0;
tc = RISCV_IOMMU_DC_TC_V;
fsc = FIELD_PREP(RISCV_IOMMU_DC_FSC_MODE, RISCV_IOMMU_DC_FSC_MODE_BARE);
+ } else if (iommu_domain->type & __IOMMU_DOMAIN_PAGING) {
+ domain = iommu_domain_to_riscv(iommu_domain);
+
+ ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, domain->pscid);
+ tc = RISCV_IOMMU_DC_TC_V;
+ if (domain->amo_enabled)
+ tc |= RISCV_IOMMU_DC_TC_SADE;
+ fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, domain->pgd_mode) |
+ FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, virt_to_pfn(domain->pgd_root));
+
+ bond = kzalloc(sizeof(*bond), GFP_KERNEL);
+ if (!bond)
+ return -ENOMEM;
+ bond->dev = dev;
} else {
/* This should never happen. */
return -ENODEV;
@@ -787,12 +870,390 @@ static int riscv_iommu_attach_domain(struct riscv_iommu_device *iommu,
xchg64(&dc->ta, ta);
xchg64(&dc->tc, tc);
- /* Device context invalidation will be required. Ignoring for now. */
+ if (!(tc & RISCV_IOMMU_DC_TC_V))
+ continue;
+
+ /* Invalidate device context cache */
+ riscv_iommu_cmd_iodir_inval_ddt(&cmd);
+ riscv_iommu_cmd_iodir_set_did(&cmd, fwspec->ids[i]);
+ riscv_iommu_cmd_send(iommu, &cmd, 0);
+
+ if (FIELD_GET(RISCV_IOMMU_PC_FSC_MODE, fsc) == RISCV_IOMMU_DC_FSC_MODE_BARE)
+ continue;
+
+ /* Invalidate last valid PSCID */
+ riscv_iommu_cmd_inval_vma(&cmd);
+ riscv_iommu_cmd_inval_set_pscid(&cmd, FIELD_GET(RISCV_IOMMU_DC_TA_PSCID, ta));
+ riscv_iommu_cmd_send(iommu, &cmd, 0);
+ }
+
+ /* Synchronize directory update */
+ riscv_iommu_cmd_iofence(&cmd);
+ riscv_iommu_cmd_send(iommu, &cmd, RISCV_IOMMU_IOTINVAL_TIMEOUT);
+
+ /* Track domain to devices mapping. */
+ if (bond)
+ list_add_rcu(&bond->list, &domain->bonds);
+
+ /* Remove tracking from previous domain, if needed. */
+ iommu_domain = iommu_get_domain_for_dev(dev);

Calling iommu_get_domain_for_dev() in the domain attaching path is very
fragile because it heavily depends on the order of calling the attach
callback and setting the domain pointer in the core.

Perhaps the driver can use dev_iommu_priv_set/get() to keep the active
domain in the per-device private data?

+ if (iommu_domain && !!(iommu_domain->type & __IOMMU_DOMAIN_PAGING)) {
+ domain = iommu_domain_to_riscv(iommu_domain);
+ bond = NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(b, &domain->bonds, list) {
+ if (b->dev == dev) {
+ bond = b;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (bond) {
+ list_del_rcu(&bond->list);
+ kfree_rcu(bond, rcu);
+ }
+ }
+
+ return 0;
+}

Best regards,
baolu