[PATCH v10 04/13] iommu/arm-smmu-qcom: Add implementation for the adreno GPU SMMU

From: Jordan Crouse
Date: Mon Jul 20 2020 - 12:45:02 EST


Add a special implementation for the SMMU attached to most Adreno GPU
target triggered from the qcom,adreno-smmu compatible string.

The new Adreno SMMU implementation will enable split pagetables
(TTBR1) for the domain attached to the GPU device (SID 0) and
hard code it context bank 0 so the GPU hardware can implement
per-instance pagetables.

Signed-off-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx>
---

drivers/iommu/arm-smmu-impl.c | 3 ++
drivers/iommu/arm-smmu-qcom.c | 83 ++++++++++++++++++++++++++++++++++-
drivers/iommu/arm-smmu.h | 1 +
3 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
index b71b14685cc9..3bb1ef4e85f7 100644
--- a/drivers/iommu/arm-smmu-impl.c
+++ b/drivers/iommu/arm-smmu-impl.c
@@ -176,5 +176,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
of_device_is_compatible(np, "qcom,sc7180-smmu-500"))
return qcom_smmu_impl_init(smmu);

+ if (of_device_is_compatible(smmu->dev->of_node, "qcom,adreno-smmu"))
+ return qcom_adreno_smmu_impl_init(smmu);
+
return smmu;
}
diff --git a/drivers/iommu/arm-smmu-qcom.c b/drivers/iommu/arm-smmu-qcom.c
index be4318044f96..b9a5c5369e86 100644
--- a/drivers/iommu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm-smmu-qcom.c
@@ -12,6 +12,67 @@ struct qcom_smmu {
struct arm_smmu_device smmu;
};

+#define QCOM_ADRENO_SMMU_GPU_SID 0
+
+static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
+{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
+ int idx, i;
+
+ /*
+ * The GPU will always use SID 0 so that is a handy way to uniquely
+ * identify it and configure it for per-instance pagetables
+ */
+ for_each_cfg_sme(cfg, fwspec, i, idx) {
+ u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
+
+ if (sid == QCOM_ADRENO_SMMU_GPU_SID)
+ return true;
+ }
+
+ return false;
+}
+
+static int qcom_adreno_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
+ struct device *dev, int start, int count)
+{
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+
+ /*
+ * Assign context bank 0 to the GPU device so the GPU hardware can
+ * switch pagetables
+ */
+ if (qcom_adreno_smmu_is_gpu_device(dev)) {
+ if (start > 0 || test_bit(0, smmu->context_map))
+ return -ENOSPC;
+
+ set_bit(0, smmu->context_map);
+ return 0;
+ }
+
+ return __arm_smmu_alloc_bitmap(smmu->context_map, start, count);
+}
+
+static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
+ struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
+{
+ /* Only enable split pagetables for the GPU device (SID 0) */
+ if (!qcom_adreno_smmu_is_gpu_device(dev))
+ return 0;
+
+ /*
+ * All targets that use the qcom,adreno-smmu compatible string *should*
+ * be AARCH64 stage 1 but double check because the arm-smmu code assumes
+ * that is the case when the TTBR1 quirk is enabled
+ */
+ if ((smmu_domain->stage == ARM_SMMU_DOMAIN_S1) &&
+ (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64))
+ pgtbl_cfg->quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1;
+
+ return 0;
+}
+
static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,adreno" },
{ .compatible = "qcom,mdp4" },
@@ -65,7 +126,15 @@ static const struct arm_smmu_impl qcom_smmu_impl = {
.reset = qcom_smmu500_reset,
};

-struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
+static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
+ .init_context = qcom_adreno_smmu_init_context,
+ .def_domain_type = qcom_smmu_def_domain_type,
+ .reset = qcom_smmu500_reset,
+ .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
+};
+
+static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
+ const struct arm_smmu_impl *impl)
{
struct qcom_smmu *qsmmu;

@@ -75,8 +144,18 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)

qsmmu->smmu = *smmu;

- qsmmu->smmu.impl = &qcom_smmu_impl;
+ qsmmu->smmu.impl = impl;
devm_kfree(smmu->dev, smmu);

return &qsmmu->smmu;
}
+
+struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
+{
+ return qcom_smmu_create(smmu, &qcom_smmu_impl);
+}
+
+struct arm_smmu_device *qcom_adreno_smmu_impl_init(struct arm_smmu_device *smmu)
+{
+ return qcom_smmu_create(smmu, &qcom_adreno_smmu_impl);
+}
diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
index d10d745a0290..9f81c1fffe1e 100644
--- a/drivers/iommu/arm-smmu.h
+++ b/drivers/iommu/arm-smmu.h
@@ -491,6 +491,7 @@ static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page,

struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu);
struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu);
+struct arm_smmu_device *qcom_adreno_smmu_impl_init(struct arm_smmu_device *smmu);

int arm_mmu500_reset(struct arm_smmu_device *smmu);

--
2.25.1