Re: [PATCH v7 14/23] iommu/smmuv3: Implement cache_invalidate

From: Robin Murphy
Date: Mon May 13 2019 - 10:02:39 EST


On 13/05/2019 13:16, Auger Eric wrote:
Hi Robin,
On 5/8/19 5:01 PM, Robin Murphy wrote:
On 08/04/2019 13:19, Eric Auger wrote:
Implement domain-selective and page-selective IOTLB invalidations.

Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx>

---
v6 -> v7
- check the uapi version

v3 -> v4:
- adapt to changes in the uapi
- add support for leaf parameter
- do not use arm_smmu_tlb_inv_range_nosync or arm_smmu_tlb_inv_context
ÂÂ anymore

v2 -> v3:
- replace __arm_smmu_tlb_sync by arm_smmu_cmdq_issue_sync

v1 -> v2:
- properly pass the asid
---
 drivers/iommu/arm-smmu-v3.c | 60 +++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 1486baf53425..4366921d8318 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2326,6 +2326,65 @@ static void arm_smmu_detach_pasid_table(struct
iommu_domain *domain)
ÂÂÂÂÂ mutex_unlock(&smmu_domain->init_mutex);
 }
 +static int
+arm_smmu_cache_invalidate(struct iommu_domain *domain, struct device
*dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂ struct iommu_cache_invalidate_info *inv_info)
+{
+ÂÂÂ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ÂÂÂ struct arm_smmu_device *smmu = smmu_domain->smmu;
+
+ÂÂÂ if (smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
+ÂÂÂÂÂÂÂ return -EINVAL;
+
+ÂÂÂ if (!smmu)
+ÂÂÂÂÂÂÂ return -EINVAL;
+
+ÂÂÂ if (inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
+ÂÂÂÂÂÂÂ return -EINVAL;
+
+ÂÂÂ if (inv_info->cache & IOMMU_CACHE_INV_TYPE_IOTLB) {
+ÂÂÂÂÂÂÂ if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
+ÂÂÂÂÂÂÂÂÂÂÂ struct arm_smmu_cmdq_ent cmd = {
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .opcode = CMDQ_OP_TLBI_NH_ASID,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .tlbi = {
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .vmid = smmu_domain->s2_cfg.vmid,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .asid = inv_info->pasid,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ },
+ÂÂÂÂÂÂÂÂÂÂÂ };
+
+ÂÂÂÂÂÂÂÂÂÂÂ arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+ÂÂÂÂÂÂÂÂÂÂÂ arm_smmu_cmdq_issue_sync(smmu);

I'd much rather make arm_smmu_tlb_inv_context() understand nested
domains than open-code commands all over the place.



+
+ÂÂÂÂÂÂÂ } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
+ÂÂÂÂÂÂÂÂÂÂÂ struct iommu_inv_addr_info *info = &inv_info->addr_info;
+ÂÂÂÂÂÂÂÂÂÂÂ size_t size = info->nb_granules * info->granule_size;
+ÂÂÂÂÂÂÂÂÂÂÂ bool leaf = info->flags & IOMMU_INV_ADDR_FLAGS_LEAF;
+ÂÂÂÂÂÂÂÂÂÂÂ struct arm_smmu_cmdq_ent cmd = {
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .opcode = CMDQ_OP_TLBI_NH_VA,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .tlbi = {
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .addr = info->addr,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .vmid = smmu_domain->s2_cfg.vmid,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .asid = info->pasid,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ .leaf = leaf,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ },
+ÂÂÂÂÂÂÂÂÂÂÂ };
+
+ÂÂÂÂÂÂÂÂÂÂÂ do {
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ cmd.tlbi.addr += info->granule_size;
+ÂÂÂÂÂÂÂÂÂÂÂ } while (size -= info->granule_size);
+ÂÂÂÂÂÂÂÂÂÂÂ arm_smmu_cmdq_issue_sync(smmu);

An this in particular I would really like to go all the way through
io_pgtable_tlb_add_flush()/io_pgtable_sync() if at all possible. Hooking
up range-based invalidations is going to be a massive headache if the
abstraction isn't solid.

The concern is the host does not "own" the s1 config asid
(smmu_domain->s1_cfg.cd.asid is not set, practically). In our case the
asid only is passed by the userspace on CACHE_INVALIDATE ioctl call.

arm_smmu_tlb_inv_context and arm_smmu_tlb_inv_range_nosync use this field

Right, but that's not exactly hard to solve. Even just something like the (untested, purely illustrative) refactoring below would be beneficial.

Robin.

----->8-----
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index d3880010c6cf..31ef703cf671 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1423,11 +1423,9 @@ static void arm_smmu_tlb_inv_context(void *cookie)
arm_smmu_cmdq_issue_sync(smmu);
}

-static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+static void __arm_smmu_tlb_inv_range(struct arm_smmu_domain *smmu_domain, u16 asid,
+ unsigned long iova, size_t size, size_t granule, bool leaf)
{
- struct arm_smmu_domain *smmu_domain = cookie;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cmdq_ent cmd = {
.tlbi = {
.leaf = leaf,
@@ -1437,18 +1435,27 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,

if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
cmd.opcode = CMDQ_OP_TLBI_NH_VA;
- cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
+ cmd.tlbi.asid = asid;
} else {
cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
}

do {
- arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+ arm_smmu_cmdq_issue_cmd(smmu_domain->smmu, &cmd);
cmd.tlbi.addr += granule;
} while (size -= granule);
}

+static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
+ size_t granule, bool leaf, void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+
+ __arm_smmu_tlb_inv_range(smmu_domain, smmu_domain->s1_cfg.cd.asid, iova,
+ size, granule, leaf);
+}
+
static const struct iommu_gather_ops arm_smmu_gather_ops = {
.tlb_flush_all = arm_smmu_tlb_inv_context,
.tlb_add_flush = arm_smmu_tlb_inv_range_nosync,