[RFC][PATCH v2 12/13] iommu/arm-smmu-v3: Add support for NVIDIA CMDQ-Virtualization hw

From: Nicolin Chen
Date: Mon Aug 30 2021 - 23:09:01 EST


From: Nate Watterson <nwatterson@xxxxxxxxxx>

NVIDIA's Grace SoC has a CMDQ-Virtualization (CMDQV) hardware,
which adds multiple VCMDQ interfaces (VINTFs) to supplement the
architected SMMU_CMDQ in an effort to reduce contention.

To make use of these supplemental CMDQs in arm-smmu-v3 driver,
this patch borrows the "implemenatation infrastructure" design
from the arm-smmu driver, and then adds implementation specific
supports for ->device_reset() and ->get_cmdq() functions. Since
nvidia's ->get_cmdq() implemenatation needs to check the first
command of the cmdlist to determine whether to redirect to its
own vcmdq, this patch also adds augments to arm_smmu_get_cmdq()
function.

For the CMDQV driver itself, this patch only adds the essential
parts for the host kernel, in terms of virtualization use cases.
VINTF0 is being reserved for host kernel use, so is initialized
with the driver also.

Note that, for the current plan, the CMDQV driver only supports
ACPI configuration.

Signed-off-by: Nate Watterson <nwatterson@xxxxxxxxxx>
Signed-off-by: Nicolin Chen <nicolinc@xxxxxxxxxx>
---
MAINTAINERS | 2 +
drivers/iommu/arm/arm-smmu-v3/Makefile | 2 +-
.../iommu/arm/arm-smmu-v3/arm-smmu-v3-impl.c | 7 +
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 +-
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 8 +
.../iommu/arm/arm-smmu-v3/nvidia-smmu-v3.c | 432 ++++++++++++++++++
6 files changed, 463 insertions(+), 3 deletions(-)
create mode 100644 drivers/iommu/arm/arm-smmu-v3/nvidia-smmu-v3.c

diff --git a/MAINTAINERS b/MAINTAINERS
index f800abca74b0..7a2f21279d35 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18428,8 +18428,10 @@ F: drivers/i2c/busses/i2c-tegra.c
TEGRA IOMMU DRIVERS
M: Thierry Reding <thierry.reding@xxxxxxxxx>
R: Krishna Reddy <vdumpa@xxxxxxxxxx>
+R: Nicolin Chen <nicoleotsuka@xxxxxxxxx>
L: linux-tegra@xxxxxxxxxxxxxxx
S: Supported
+F: drivers/iommu/arm/arm-smmu-v3/nvidia-smmu-v3.c
F: drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
F: drivers/iommu/tegra*

diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile
index 1f5838d3351b..0aa84c0a50ea 100644
--- a/drivers/iommu/arm/arm-smmu-v3/Makefile
+++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
-arm_smmu_v3-objs-y += arm-smmu-v3.o arm-smmu-v3-impl.o
+arm_smmu_v3-objs-y += arm-smmu-v3.o arm-smmu-v3-impl.o nvidia-smmu-v3.o
arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
arm_smmu_v3-objs := $(arm_smmu_v3-objs-y)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-impl.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-impl.c
index 6947d28067a8..37d062e40eb5 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-impl.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-impl.c
@@ -4,5 +4,12 @@

struct arm_smmu_device *arm_smmu_v3_impl_init(struct arm_smmu_device *smmu)
{
+ /*
+ * Nvidia implementation supports ACPI only, so calling its init()
+ * unconditionally to walk through ACPI tables to probe the device.
+ * It will keep the smmu pointer intact, if it fails.
+ */
+ smmu = nvidia_smmu_v3_impl_init(smmu);
+
return smmu;
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 510e1493fd5a..1b9459592f76 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -335,8 +335,11 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
return 0;
}

-static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
+static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu, u64 *cmds, int n)
{
+ if (smmu->impl && smmu->impl->get_cmdq)
+ return smmu->impl->get_cmdq(smmu, cmds, n);
+
return &smmu->cmdq;
}

@@ -742,7 +745,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
u32 prod;
unsigned long flags;
bool owner;
- struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
+ struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu, cmds, n);
struct arm_smmu_ll_queue llq, head;
int ret = 0;

@@ -3487,6 +3490,14 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
return ret;
}

+ if (smmu->impl && smmu->impl->device_reset) {
+ ret = smmu->impl->device_reset(smmu);
+ if (ret) {
+ dev_err(smmu->dev, "failed at implementation specific device_reset\n");
+ return ret;
+ }
+ }
+
return 0;
}

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index c65c39336916..bb903a7fa662 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -647,6 +647,8 @@ struct arm_smmu_device {
#define ARM_SMMU_OPT_MSIPOLL (1 << 2)
u32 options;

+ const struct arm_smmu_impl *impl;
+
struct arm_smmu_cmdq cmdq;
struct arm_smmu_evtq evtq;
struct arm_smmu_priq priq;
@@ -812,6 +814,12 @@ static inline void arm_smmu_sva_notifier_synchronize(void) {}
#endif /* CONFIG_ARM_SMMU_V3_SVA */

/* Implementation details */
+struct arm_smmu_impl {
+ int (*device_reset)(struct arm_smmu_device *smmu);
+ struct arm_smmu_cmdq *(*get_cmdq)(struct arm_smmu_device *smmu, u64 *cmds, int n);
+};
+
struct arm_smmu_device *arm_smmu_v3_impl_init(struct arm_smmu_device *smmu);
+struct arm_smmu_device *nvidia_smmu_v3_impl_init(struct arm_smmu_device *smmu);

#endif /* _ARM_SMMU_V3_H */
diff --git a/drivers/iommu/arm/arm-smmu-v3/nvidia-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/nvidia-smmu-v3.c
new file mode 100644
index 000000000000..0c92fe433c6e
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu-v3/nvidia-smmu-v3.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define dev_fmt(fmt) "nvidia_smmu_cmdqv: " fmt
+
+#include <linux/acpi.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/platform_device.h>
+
+#include <acpi/acpixf.h>
+
+#include "arm-smmu-v3.h"
+
+#define NVIDIA_SMMU_CMDQV_HID "NVDA0600"
+
+/* CMDQV register page base and size defines */
+#define NVIDIA_CMDQV_CONFIG_BASE (0)
+#define NVIDIA_CMDQV_CONFIG_SIZE (SZ_64K)
+#define NVIDIA_VCMDQ_BASE (0 + SZ_64K)
+#define NVIDIA_VCMDQ_SIZE (SZ_64K * 2) /* PAGE0 and PAGE1 */
+
+/* CMDQV global config regs */
+#define NVIDIA_CMDQV_CONFIG 0x0000
+#define CMDQV_EN BIT(0)
+
+#define NVIDIA_CMDQV_PARAM 0x0004
+#define CMDQV_NUM_VINTF_LOG2 GENMASK(11, 8)
+#define CMDQV_NUM_VCMDQ_LOG2 GENMASK(7, 4)
+
+#define NVIDIA_CMDQV_STATUS 0x0008
+#define CMDQV_STATUS GENMASK(2, 1)
+#define CMDQV_ENABLED BIT(0)
+
+#define NVIDIA_CMDQV_VINTF_ERR_MAP 0x000C
+#define NVIDIA_CMDQV_VINTF_INT_MASK 0x0014
+#define NVIDIA_CMDQV_VCMDQ_ERR_MAP 0x001C
+
+#define NVIDIA_CMDQV_CMDQ_ALLOC(q) (0x0200 + 0x4*(q))
+#define CMDQV_CMDQ_ALLOC_VINTF GENMASK(20, 15)
+#define CMDQV_CMDQ_ALLOC_LVCMDQ GENMASK(7, 1)
+#define CMDQV_CMDQ_ALLOCATED BIT(0)
+
+/* VINTF config regs */
+#define NVIDIA_CMDQV_VINTF(v) (0x1000 + 0x100*(v))
+
+#define NVIDIA_VINTF_CONFIG 0x0000
+#define VINTF_HYP_OWN BIT(17)
+#define VINTF_VMID GENMASK(16, 1)
+#define VINTF_EN BIT(0)
+
+#define NVIDIA_VINTF_STATUS 0x0004
+#define VINTF_STATUS GENMASK(3, 1)
+#define VINTF_ENABLED BIT(0)
+
+/* VCMDQ config regs */
+/* -- PAGE0 -- */
+#define NVIDIA_CMDQV_VCMDQ(q) (NVIDIA_VCMDQ_BASE + 0x80*(q))
+
+#define NVIDIA_VCMDQ_CONS 0x00000
+#define VCMDQ_CONS_ERR GENMASK(30, 24)
+
+#define NVIDIA_VCMDQ_PROD 0x00004
+
+#define NVIDIA_VCMDQ_CONFIG 0x00008
+#define VCMDQ_EN BIT(0)
+
+#define NVIDIA_VCMDQ_STATUS 0x0000C
+#define VCMDQ_ENABLED BIT(0)
+
+#define NVIDIA_VCMDQ_GERROR 0x00010
+#define NVIDIA_VCMDQ_GERRORN 0x00014
+
+/* -- PAGE1 -- */
+#define NVIDIA_VCMDQ_BASE_L(q) (NVIDIA_CMDQV_VCMDQ(q) + SZ_64K)
+#define VCMDQ_ADDR GENMASK(63, 5)
+#define VCMDQ_LOG2SIZE GENMASK(4, 0)
+
+struct nvidia_smmu_vintf {
+ u16 idx;
+ u32 cfg;
+ u32 status;
+
+ void __iomem *base;
+ struct arm_smmu_cmdq *vcmdqs;
+};
+
+struct nvidia_smmu {
+ struct arm_smmu_device smmu;
+
+ struct device *cmdqv_dev;
+ void __iomem *cmdqv_base;
+ int cmdqv_irq;
+
+ /* CMDQV Hardware Params */
+ u16 num_total_vintfs;
+ u16 num_total_vcmdqs;
+ u16 num_vcmdqs_per_vintf;
+
+ /* CMDQV_VINTF(0) reserved for host kernel use */
+ struct nvidia_smmu_vintf vintf0;
+};
+
+static irqreturn_t nvidia_smmu_cmdqv_isr(int irq, void *devid)
+{
+ struct nvidia_smmu *nsmmu = (struct nvidia_smmu *)devid;
+ struct nvidia_smmu_vintf *vintf0 = &nsmmu->vintf0;
+ u32 vintf_err_map[2];
+ u32 vcmdq_err_map[4];
+
+ vintf_err_map[0] = readl_relaxed(nsmmu->cmdqv_base + NVIDIA_CMDQV_VINTF_ERR_MAP);
+ vintf_err_map[1] = readl_relaxed(nsmmu->cmdqv_base + NVIDIA_CMDQV_VINTF_ERR_MAP + 0x4);
+
+ vcmdq_err_map[0] = readl_relaxed(nsmmu->cmdqv_base + NVIDIA_CMDQV_VCMDQ_ERR_MAP);
+ vcmdq_err_map[1] = readl_relaxed(nsmmu->cmdqv_base + NVIDIA_CMDQV_VCMDQ_ERR_MAP + 0x4);
+ vcmdq_err_map[2] = readl_relaxed(nsmmu->cmdqv_base + NVIDIA_CMDQV_VCMDQ_ERR_MAP + 0x8);
+ vcmdq_err_map[3] = readl_relaxed(nsmmu->cmdqv_base + NVIDIA_CMDQV_VCMDQ_ERR_MAP + 0xC);
+
+ dev_warn(nsmmu->cmdqv_dev,
+ "unexpected cmdqv error reported: vintf_map %08X %08X, vcmdq_map %08X %08X %08X %08X\n",
+ vintf_err_map[0], vintf_err_map[1], vcmdq_err_map[0], vcmdq_err_map[1],
+ vcmdq_err_map[2], vcmdq_err_map[3]);
+
+ /* If the error was reported by vintf0, avoid using any of its VCMDQs */
+ if (vintf_err_map[vintf0->idx / 32] & (1 << (vintf0->idx % 32))) {
+ vintf0->status = readl_relaxed(vintf0->base + NVIDIA_VINTF_STATUS);
+
+ dev_warn(nsmmu->cmdqv_dev, "error (0x%lX) reported by host vintf0 - disabling its vcmdqs\n",
+ FIELD_GET(VINTF_STATUS, vintf0->status));
+ } else if (vintf_err_map[0] || vintf_err_map[1]) {
+ dev_err(nsmmu->cmdqv_dev, "cmdqv error interrupt triggered by unassigned vintf!\n");
+ }
+
+ return IRQ_HANDLED;
+}
+
+/* Adapt struct arm_smmu_cmdq init sequences from arm-smmu-v3.c for VCMDQs */
+static int nvidia_smmu_init_one_arm_smmu_cmdq(struct nvidia_smmu *nsmmu,
+ struct arm_smmu_cmdq *cmdq,
+ void __iomem *vcmdq_base,
+ u16 qidx)
+{
+ struct arm_smmu_queue *q = &cmdq->q;
+ size_t qsz;
+
+ /* struct arm_smmu_cmdq config normally done in arm_smmu_device_hw_probe() */
+ q->llq.max_n_shift = ilog2(SZ_64K >> CMDQ_ENT_SZ_SHIFT);
+
+ /* struct arm_smmu_cmdq config normally done in arm_smmu_init_one_queue() */
+ qsz = (1 << q->llq.max_n_shift) << CMDQ_ENT_SZ_SHIFT;
+ q->base = dmam_alloc_coherent(nsmmu->cmdqv_dev, qsz, &q->base_dma, GFP_KERNEL);
+ if (!q->base) {
+ dev_err(nsmmu->cmdqv_dev, "failed to allocate 0x%zX bytes for VCMDQ%u\n",
+ qsz, qidx);
+ return -ENOMEM;
+ }
+ dev_dbg(nsmmu->cmdqv_dev, "allocated %u entries for VCMDQ%u @ 0x%llX [%pad] ++ %zX",
+ 1 << q->llq.max_n_shift, qidx, (u64)q->base, &q->base_dma, qsz);
+
+ q->prod_reg = vcmdq_base + NVIDIA_VCMDQ_PROD;
+ q->cons_reg = vcmdq_base + NVIDIA_VCMDQ_CONS;
+ q->ent_dwords = CMDQ_ENT_DWORDS;
+
+ q->q_base = q->base_dma & VCMDQ_ADDR;
+ q->q_base |= FIELD_PREP(VCMDQ_LOG2SIZE, q->llq.max_n_shift);
+
+ q->llq.prod = q->llq.cons = 0;
+
+ /* struct arm_smmu_cmdq config normally done in arm_smmu_cmdq_init() */
+ atomic_set(&cmdq->owner_prod, 0);
+ atomic_set(&cmdq->lock, 0);
+
+ cmdq->valid_map = (atomic_long_t *)bitmap_zalloc(1 << q->llq.max_n_shift, GFP_KERNEL);
+ if (!cmdq->valid_map) {
+ dev_err(nsmmu->cmdqv_dev, "failed to allocate valid_map for VCMDQ%u\n", qidx);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int nvidia_smmu_cmdqv_init(struct nvidia_smmu *nsmmu)
+{
+ struct nvidia_smmu_vintf *vintf0 = &nsmmu->vintf0;
+ u32 regval;
+ u16 qidx;
+ int ret;
+
+ /* Setup vintf0 for host kernel */
+ vintf0->idx = 0;
+ vintf0->base = nsmmu->cmdqv_base + NVIDIA_CMDQV_VINTF(0);
+
+ regval = FIELD_PREP(VINTF_HYP_OWN, nsmmu->num_total_vintfs > 1);
+ writel_relaxed(regval, vintf0->base + NVIDIA_VINTF_CONFIG);
+
+ regval |= FIELD_PREP(VINTF_EN, 1);
+ writel_relaxed(regval, vintf0->base + NVIDIA_VINTF_CONFIG);
+
+ vintf0->cfg = regval;
+
+ ret = readl_relaxed_poll_timeout(vintf0->base + NVIDIA_VINTF_STATUS,
+ regval, regval == VINTF_ENABLED,
+ 1, ARM_SMMU_POLL_TIMEOUT_US);
+ vintf0->status = regval;
+ if (ret) {
+ dev_err(nsmmu->cmdqv_dev, "failed to enable VINTF%u: STATUS = 0x%08X\n",
+ vintf0->idx, regval);
+ return ret;
+ }
+
+ /* Allocate vcmdqs to vintf0 */
+ for (qidx = 0; qidx < nsmmu->num_vcmdqs_per_vintf; qidx++) {
+ regval = FIELD_PREP(CMDQV_CMDQ_ALLOC_VINTF, vintf0->idx);
+ regval |= FIELD_PREP(CMDQV_CMDQ_ALLOC_LVCMDQ, qidx);
+ regval |= CMDQV_CMDQ_ALLOCATED;
+ writel_relaxed(regval, nsmmu->cmdqv_base + NVIDIA_CMDQV_CMDQ_ALLOC(qidx));
+ }
+
+ /* Build an arm_smmu_cmdq for each vcmdq allocated to vintf0 */
+ vintf0->vcmdqs = devm_kcalloc(nsmmu->cmdqv_dev, nsmmu->num_vcmdqs_per_vintf,
+ sizeof(*vintf0->vcmdqs), GFP_KERNEL);
+ if (!vintf0->vcmdqs)
+ return -ENOMEM;
+
+ for (qidx = 0; qidx < nsmmu->num_vcmdqs_per_vintf; qidx++) {
+ void __iomem *vcmdq_base = nsmmu->cmdqv_base + NVIDIA_CMDQV_VCMDQ(qidx);
+ struct arm_smmu_cmdq *cmdq = &vintf0->vcmdqs[qidx];
+
+ /* Setup struct arm_smmu_cmdq data members */
+ nvidia_smmu_init_one_arm_smmu_cmdq(nsmmu, cmdq, vcmdq_base, qidx);
+
+ /* Configure and enable the vcmdq */
+ writel_relaxed(0, vcmdq_base + NVIDIA_VCMDQ_PROD);
+ writel_relaxed(0, vcmdq_base + NVIDIA_VCMDQ_CONS);
+
+ writeq_relaxed(cmdq->q.q_base, nsmmu->cmdqv_base + NVIDIA_VCMDQ_BASE_L(qidx));
+
+ writel_relaxed(VCMDQ_EN, vcmdq_base + NVIDIA_VCMDQ_CONFIG);
+ ret = readl_poll_timeout(vcmdq_base + NVIDIA_VCMDQ_STATUS,
+ regval, regval == VCMDQ_ENABLED,
+ 1, ARM_SMMU_POLL_TIMEOUT_US);
+ if (ret) {
+ u32 gerror = readl_relaxed(vcmdq_base + NVIDIA_VCMDQ_GERROR);
+ u32 gerrorn = readl_relaxed(vcmdq_base + NVIDIA_VCMDQ_GERRORN);
+ u32 cons = readl_relaxed(vcmdq_base + NVIDIA_VCMDQ_CONS);
+
+ dev_err(nsmmu->cmdqv_dev,
+ "failed to enable VCMDQ%u: GERROR=0x%X, GERRORN=0x%X, CONS=0x%X\n",
+ qidx, gerror, gerrorn, cons);
+ return ret;
+ }
+
+ dev_info(nsmmu->cmdqv_dev, "VCMDQ%u allocated to VINTF%u as logical-VCMDQ%u\n",
+ qidx, vintf0->idx, qidx);
+ }
+
+ return 0;
+}
+
+static int nvidia_smmu_probe(struct nvidia_smmu *nsmmu)
+{
+ struct platform_device *cmdqv_pdev = to_platform_device(nsmmu->cmdqv_dev);
+ struct resource *res;
+ u32 regval;
+
+ /* Base address */
+ res = platform_get_resource(cmdqv_pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENXIO;
+
+ nsmmu->cmdqv_base = devm_ioremap_resource(nsmmu->cmdqv_dev, res);
+ if (IS_ERR(nsmmu->cmdqv_base))
+ return PTR_ERR(nsmmu->cmdqv_base);
+
+ /* Interrupt */
+ nsmmu->cmdqv_irq = platform_get_irq(cmdqv_pdev, 0);
+ if (nsmmu->cmdqv_irq < 0) {
+ dev_warn(nsmmu->cmdqv_dev, "no cmdqv interrupt - errors will not be reported\n");
+ nsmmu->cmdqv_irq = 0;
+ }
+
+ /* Probe the h/w */
+ regval = readl_relaxed(nsmmu->cmdqv_base + NVIDIA_CMDQV_CONFIG);
+ if (!FIELD_GET(CMDQV_EN, regval)) {
+ dev_err(nsmmu->cmdqv_dev, "CMDQV h/w is disabled: CMDQV_CONFIG=0x%08X\n", regval);
+ return -ENODEV;
+ }
+
+ regval = readl_relaxed(nsmmu->cmdqv_base + NVIDIA_CMDQV_STATUS);
+ if (!FIELD_GET(CMDQV_ENABLED, regval) || FIELD_GET(CMDQV_STATUS, regval)) {
+ dev_err(nsmmu->cmdqv_dev, "CMDQV h/w not ready: CMDQV_STATUS=0x%08X\n", regval);
+ return -ENODEV;
+ }
+
+ regval = readl_relaxed(nsmmu->cmdqv_base + NVIDIA_CMDQV_PARAM);
+ nsmmu->num_total_vintfs = 1 << FIELD_GET(CMDQV_NUM_VINTF_LOG2, regval);
+ nsmmu->num_total_vcmdqs = 1 << FIELD_GET(CMDQV_NUM_VCMDQ_LOG2, regval);
+ nsmmu->num_vcmdqs_per_vintf = nsmmu->num_total_vcmdqs / nsmmu->num_total_vintfs;
+
+ return 0;
+}
+
+static struct arm_smmu_cmdq *nvidia_smmu_get_cmdq(struct arm_smmu_device *smmu, u64 *cmds, int n)
+{
+ struct nvidia_smmu *nsmmu = (struct nvidia_smmu *)smmu;
+ struct nvidia_smmu_vintf *vintf0 = &nsmmu->vintf0;
+ u16 qidx;
+
+ /* Make sure vintf0 is enabled and healthy */
+ if (vintf0->status != VINTF_ENABLED)
+ return &smmu->cmdq;
+
+ /* Check for illegal CMDs */
+ if (!FIELD_GET(VINTF_HYP_OWN, vintf0->cfg)) {
+ u64 opcode = (n) ? FIELD_GET(CMDQ_0_OP, cmds[0]) : CMDQ_OP_CMD_SYNC;
+
+ /* List all non-illegal CMDs for cmdq overriding */
+ switch (opcode) {
+ case CMDQ_OP_TLBI_NH_ASID:
+ case CMDQ_OP_TLBI_NH_VA:
+ case CMDQ_OP_TLBI_S12_VMALL:
+ case CMDQ_OP_TLBI_S2_IPA:
+ case CMDQ_OP_ATC_INV:
+ break;
+ default:
+ /* Skip overriding for illegal CMDs */
+ return &smmu->cmdq;
+ }
+ }
+
+ /*
+ * Select a vcmdq to use. Here we use a temporal solution to
+ * balance out traffic on cmdq issuing: each cmdq has its own
+ * lock, if all cpus issue cmdlist using the same cmdq, only
+ * one CPU at a time can enter the process, while the others
+ * will be spinning at the same lock.
+ */
+ qidx = smp_processor_id() % nsmmu->num_vcmdqs_per_vintf;
+ return &vintf0->vcmdqs[qidx];
+}
+
+static int nvidia_smmu_device_reset(struct arm_smmu_device *smmu)
+{
+ struct nvidia_smmu *nsmmu = (struct nvidia_smmu *)smmu;
+ int ret;
+
+ ret = nvidia_smmu_cmdqv_init(nsmmu);
+ if (ret)
+ return ret;
+
+ if (nsmmu->cmdqv_irq) {
+ ret = devm_request_irq(nsmmu->cmdqv_dev, nsmmu->cmdqv_irq, nvidia_smmu_cmdqv_isr,
+ IRQF_SHARED, "nvidia-smmu-cmdqv", nsmmu);
+ if (ret) {
+ dev_err(nsmmu->cmdqv_dev, "failed to claim irq (%d): %d\n",
+ nsmmu->cmdqv_irq, ret);
+ return ret;
+ }
+ }
+
+ /* Disable FEAT_MSI and OPT_MSIPOLL since VCMDQs only support CMD_SYNC w/CS_NONE */
+ smmu->features &= ~ARM_SMMU_FEAT_MSI;
+ smmu->options &= ~ARM_SMMU_OPT_MSIPOLL;
+
+ return 0;
+}
+
+const struct arm_smmu_impl nvidia_smmu_impl = {
+ .device_reset = nvidia_smmu_device_reset,
+ .get_cmdq = nvidia_smmu_get_cmdq,
+};
+
+#ifdef CONFIG_ACPI
+struct nvidia_smmu *nvidia_smmu_create(struct arm_smmu_device *smmu)
+{
+ struct nvidia_smmu *nsmmu = NULL;
+ struct acpi_iort_node *node;
+ struct acpi_device *adev;
+ struct device *cmdqv_dev;
+ const char *match_uid;
+
+ if (acpi_disabled)
+ return NULL;
+
+ /* Look for a device in the DSDT whose _UID matches the SMMU's iort_node identifier */
+ node = *(struct acpi_iort_node **)dev_get_platdata(smmu->dev);
+ match_uid = kasprintf(GFP_KERNEL, "%u", node->identifier);
+ adev = acpi_dev_get_first_match_dev(NVIDIA_SMMU_CMDQV_HID, match_uid, -1);
+ kfree(match_uid);
+
+ if (!adev)
+ return NULL;
+
+ cmdqv_dev = bus_find_device_by_acpi_dev(&platform_bus_type, adev);
+ if (!cmdqv_dev)
+ return NULL;
+
+ dev_info(smmu->dev, "found companion CMDQV device, %s", dev_name(cmdqv_dev));
+
+ nsmmu = devm_krealloc(smmu->dev, smmu, sizeof(*nsmmu), GFP_KERNEL);
+ if (!nsmmu)
+ return ERR_PTR(-ENOMEM);
+
+ nsmmu->cmdqv_dev = cmdqv_dev;
+
+ return nsmmu;
+}
+#else
+struct nvidia_smmu *nvidia_smmu_create(struct arm_smmu_device *smmu)
+{
+ return NULL;
+}
+#endif
+
+struct arm_smmu_device *nvidia_smmu_v3_impl_init(struct arm_smmu_device *smmu)
+{
+ struct nvidia_smmu *nsmmu;
+ int ret;
+
+ nsmmu = nvidia_smmu_create(smmu);
+ if (!nsmmu)
+ return smmu;
+
+ ret = nvidia_smmu_probe(nsmmu);
+ if (ret)
+ return ERR_PTR(ret);
+
+ nsmmu->smmu.impl = &nvidia_smmu_impl;
+
+ return &nsmmu->smmu;
+}
--
2.17.1