Re: [PATCH 5/5] x86/kvm: Add KVM_FEATURE_MSI_EXT_DEST_ID

From: David Woodhouse
Date: Mon Oct 12 2020 - 12:06:52 EST


On Mon, 2020-10-12 at 11:33 +0200, Thomas Gleixner wrote:
> On Sun, Oct 11 2020 at 22:15, David Woodhouse wrote:
> > On 11 October 2020 18:12:08 BST, Thomas Gleixner <tglx@xxxxxxxxxxxxx> wrote:
> > > On Sat, Oct 10 2020 at 12:58, David Woodhouse wrote:
> > > > On 10 October 2020 12:44:10 BST, Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> > >
> > > wrote:
> > > > Yeah. There's some muttering to be done for HPET about whether it's
> > > > *its* MSI domain or whether it's the parent domain. But I'll have a
> > > > play. I think we'll be able to drop the whole
> > > > irq_remapping_get_irq_domain() thing.
> > >
> > > That would be really nice.
> >
> > I can make it work for HPET if I fix up the point at which the IRQ
> > remapping code registers a notifier on the platform bus. (At IRQ remap
> > setup time is too early; when it registers the PCI bus notifier is too
> > late.)
> >
> > IOAPIC is harder though as the platform bus doesn't even exist that
> > early. Maybe an early platform bus is possible but it would have to
> > turn out particularly simple to do, or I'd need to find another use
> > case too, to justify it. Will continue to play....
>
> You might want to look into using irq_find_matching_fwspec() instead for
> both HPET and IOAPIC. That needs a select() callback implemented in the
> remapping domains.

That works. Pushed (along with that trivial HPET MSI fixup) to
https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/ext_dest_id

Briefly tested with I/OAPIC with and without Intel remapping in qemu,
not tested for HPET because I haven't worked out how to get qemu to do
HPET-MSI yet.

David Woodhouse (8):
genirq/irqdomain: Implement get_name() method on irqchip fwnodes
x86/apic: Add select() method on vector irqdomain
iommu/amd: Implement select() method on remapping irqdomain
iommu/vt-d: Implement select() method on remapping irqdomain
iommu/hyper-v: Implement select() method on remapping irqdomain
x86/hpet: Use irq_find_matching_fwspec() to find remapping irqdomain
x86/ioapic: Use irq_find_matching_fwspec() to find remapping irqdomain
x86: Kill all traces of irq_remapping_get_irq_domain()

arch/x86/include/asm/hw_irq.h | 2 --
arch/x86/include/asm/irq_remapping.h | 9 ------
arch/x86/include/asm/irqdomain.h | 3 ++
arch/x86/kernel/apic/io_apic.c | 21 ++++++--------
arch/x86/kernel/apic/vector.c | 52 +++++++++++++++++++++++++++++++++++
arch/x86/kernel/hpet.c | 23 +++++++++-------
drivers/iommu/amd/iommu.c | 53 +++++++++++++-----------------------
drivers/iommu/hyperv-iommu.c | 18 ++++++------
drivers/iommu/intel/irq_remapping.c | 30 +++++++++-----------
drivers/iommu/irq_remapping.c | 14 ----------
drivers/iommu/irq_remapping.h | 3 --
kernel/irq/irqdomain.c | 11 +++++++-
12 files changed, 128 insertions(+), 111 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index aabd8f1b6bb0..aef795f17478 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -40,8 +40,6 @@ enum irq_alloc_type {
X86_IRQ_ALLOC_TYPE_PCI_MSIX,
X86_IRQ_ALLOC_TYPE_DMAR,
X86_IRQ_ALLOC_TYPE_UV,
- X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT,
- X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT,
};

struct ioapic_alloc_info {
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index af4a151d70b3..7cc49432187f 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -44,9 +44,6 @@ extern int irq_remapping_reenable(int);
extern int irq_remap_enable_fault_handling(void);
extern void panic_if_irq_remap(const char *msg);

-extern struct irq_domain *
-irq_remapping_get_irq_domain(struct irq_alloc_info *info);
-
/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */
extern struct irq_domain *
arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id);
@@ -71,11 +68,5 @@ static inline void panic_if_irq_remap(const char *msg)
{
}

-static inline struct irq_domain *
-irq_remapping_get_irq_domain(struct irq_alloc_info *info)
-{
- return NULL;
-}
-
#endif /* CONFIG_IRQ_REMAP */
#endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index cd684d45cb5f..2fc85f523ace 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -12,6 +12,9 @@ enum {
X86_IRQ_ALLOC_LEGACY = 0x2,
};

+int x86_fwspec_is_ioapic(struct irq_fwspec *fwspec);
+int x86_fwspec_is_hpet(struct irq_fwspec *fwspec);
+
extern struct irq_domain *x86_vector_domain;

extern void init_irq_alloc_info(struct irq_alloc_info *info,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ca2da19d5c55..f6a5b9f887aa 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2305,36 +2305,33 @@ static inline void __init check_timer(void)

static int mp_irqdomain_create(int ioapic)
{
- struct irq_alloc_info info;
struct irq_domain *parent;
int hwirqs = mp_ioapic_pin_count(ioapic);
struct ioapic *ip = &ioapics[ioapic];
struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
struct fwnode_handle *fn;
- char *name = "IO-APIC";
+ struct irq_fwspec fwspec;

if (cfg->type == IOAPIC_DOMAIN_INVALID)
return 0;

- init_irq_alloc_info(&info, NULL);
- info.type = X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT;
- info.devid = mpc_ioapic_id(ioapic);
- parent = irq_remapping_get_irq_domain(&info);
- if (!parent)
- parent = x86_vector_domain;
- else
- name = "IO-APIC-IR";
-
/* Handle device tree enumerated APICs proper */
if (cfg->dev) {
fn = of_node_to_fwnode(cfg->dev);
} else {
- fn = irq_domain_alloc_named_id_fwnode(name, ioapic);
+ fn = irq_domain_alloc_named_id_fwnode("IO-APIC", ioapic);
if (!fn)
return -ENOMEM;
}

+ fwspec.fwnode = fn;
+ fwspec.param_count = 1;
+ fwspec.param[0] = ioapic;
+ parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_ANY);
+ if (!parent)
+ return -ENODEV;
+
ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops,
(void *)(long)ioapic);

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index bb2e2a2488a5..3f0485c12b13 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -636,7 +636,59 @@ static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
}
#endif

+
+int x86_fwspec_is_ioapic(struct irq_fwspec *fwspec)
+{
+ if (fwspec->param_count != 1)
+ return 0;
+
+ if (is_fwnode_irqchip(fwspec->fwnode)){
+ const char *fwname = fwnode_get_name(fwspec->fwnode);
+
+ if (!strncmp(fwname, "IO-APIC-", 8) &&
+ simple_strtol(fwname+8, NULL, 10) == fwspec->param[0])
+ return 1;
+#ifdef CONFIG_OF
+ } else if (to_of_node(fwspec->fwnode) &&
+ of_device_is_compatible(to_of_node(fwspec->fwnode),
+ "intel,ce4100-ioapic")) {
+ return 1;
+#endif
+ }
+ return 0;
+}
+
+int x86_fwspec_is_hpet(struct irq_fwspec *fwspec)
+{
+ if (fwspec->param_count != 1)
+ return 0;
+
+ if (is_fwnode_irqchip(fwspec->fwnode)){
+ const char *fwname = fwnode_get_name(fwspec->fwnode);
+
+ if (!strncmp(fwname, "HPET-MSI-", 9) &&
+ simple_strtol(fwname+9, NULL, 10) == fwspec->param[0])
+ return 1;
+ }
+ return 0;
+}
+
+static int x86_vector_select(struct irq_domain *d, struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token)
+{
+ /*
+ * HPET and I/OAPIC cannot be parented in the vector domain
+ * if IRQ remapping is enabled. APIC IDs above 15 bits are
+ * only permitted if IRQ remapping is enabled, so check that.
+ */
+ if (apic->apic_id_valid(32768))
+ return 0;
+
+ return x86_fwspec_is_ioapic(fwspec) || x86_fwspec_is_hpet(fwspec);
+}
+
static const struct irq_domain_ops x86_vector_domain_ops = {
+ .select = x86_vector_select,
.alloc = x86_vector_alloc_irqs,
.free = x86_vector_free_irqs,
.activate = x86_vector_activate,
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 3b8b12769f3b..fb7736ca7b5b 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -543,8 +543,8 @@ static struct irq_domain *hpet_create_irq_domain(int hpet_id)
{
struct msi_domain_info *domain_info;
struct irq_domain *parent, *d;
- struct irq_alloc_info info;
struct fwnode_handle *fn;
+ struct irq_fwspec fwspec;

if (x86_vector_domain == NULL)
return NULL;
@@ -556,15 +556,6 @@ static struct irq_domain *hpet_create_irq_domain(int hpet_id)
*domain_info = hpet_msi_domain_info;
domain_info->data = (void *)(long)hpet_id;

- init_irq_alloc_info(&info, NULL);
- info.type = X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT;
- info.devid = hpet_id;
- parent = irq_remapping_get_irq_domain(&info);
- if (parent == NULL)
- parent = x86_vector_domain;
- else
- hpet_msi_controller.name = "IR-HPET-MSI";
-
fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name,
hpet_id);
if (!fn) {
@@ -572,6 +563,18 @@ static struct irq_domain *hpet_create_irq_domain(int hpet_id)
return NULL;
}

+ fwspec.fwnode = fn;
+ fwspec.param_count = 1;
+ fwspec.param[0] = hpet_id;
+ parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_ANY);
+ if (!parent) {
+ irq_domain_free_fwnode(fn);
+ kfree(domain_info);
+ return NULL;
+ }
+ if (parent != x86_vector_domain)
+ hpet_msi_controller.name = "IR-HPET-MSI";
+
d = msi_create_irq_domain(fn, domain_info, parent);
if (!d) {
irq_domain_free_fwnode(fn);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 13d0a8f42d56..16adbf9d8fbb 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -3536,10 +3536,8 @@ static int get_devid(struct irq_alloc_info *info)
{
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
- case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT:
return get_ioapic_devid(info->devid);
case X86_IRQ_ALLOC_TYPE_HPET:
- case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT:
return get_hpet_devid(info->devid);
case X86_IRQ_ALLOC_TYPE_PCI_MSI:
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
@@ -3550,46 +3548,32 @@ static int get_devid(struct irq_alloc_info *info)
}
}

-static struct irq_domain *get_irq_domain_for_devid(struct irq_alloc_info *info,
- int devid)
-{
- struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
-
- if (!iommu)
- return NULL;
-
- switch (info->type) {
- case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT:
- case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT:
- return iommu->ir_domain;
- default:
- WARN_ON_ONCE(1);
- return NULL;
- }
-}
-
-static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
-{
- int devid;
-
- if (!info)
- return NULL;
-
- devid = get_devid(info);
- if (devid < 0)
- return NULL;
- return get_irq_domain_for_devid(info, devid);
-}
-
struct irq_remap_ops amd_iommu_irq_ops = {
.prepare = amd_iommu_prepare,
.enable = amd_iommu_enable,
.disable = amd_iommu_disable,
.reenable = amd_iommu_reenable,
.enable_faulting = amd_iommu_enable_faulting,
- .get_irq_domain = get_irq_domain,
};

+static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token)
+{
+ struct amd_iommu *iommu;
+ int devid = -1;
+
+ if (x86_fwspec_is_ioapic(fwspec))
+ devid = get_ioapic_devid(fwspec->param[0]);
+ else if (x86_fwspec_is_ioapic(fwspec))
+ devid = get_hpet_devid(fwspec->param[0]);
+
+ if (devid < 0)
+ return 0;
+
+ iommu = amd_iommu_rlookup_table[devid];
+ return iommu && iommu->ir_domain == d;
+}
+
static void irq_remapping_prepare_irte(struct amd_ir_data *data,
struct irq_cfg *irq_cfg,
struct irq_alloc_info *info,
@@ -3813,6 +3797,7 @@ static void irq_remapping_deactivate(struct irq_domain *domain,
}

static const struct irq_domain_ops amd_ir_domain_ops = {
+ .select = irq_remapping_select,
.alloc = irq_remapping_alloc,
.free = irq_remapping_free,
.activate = irq_remapping_activate,
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index 37dd485a5640..e7ed2bb83358 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -61,6 +61,14 @@ static struct irq_chip hyperv_ir_chip = {
.irq_set_affinity = hyperv_ir_set_affinity,
};

+static int hyperv_irq_remapping_select(struct irq_domain *d,
+ struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token)
+{
+ /* Claim only the first (and only) I/OAPIC */
+ return x86_fwspec_is_ioapic(fwspec) && fwspec->param[0] == 0;
+}
+
static int hyperv_irq_remapping_alloc(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs,
void *arg)
@@ -102,6 +110,7 @@ static void hyperv_irq_remapping_free(struct irq_domain *domain,
}

static const struct irq_domain_ops hyperv_ir_domain_ops = {
+ .select = hyperv_irq_remapping_select,
.alloc = hyperv_irq_remapping_alloc,
.free = hyperv_irq_remapping_free,
};
@@ -151,18 +160,9 @@ static int __init hyperv_enable_irq_remapping(void)
return IRQ_REMAP_X2APIC_MODE;
}

-static struct irq_domain *hyperv_get_irq_domain(struct irq_alloc_info *info)
-{
- if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT)
- return ioapic_ir_domain;
- else
- return NULL;
-}
-
struct irq_remap_ops hyperv_irq_remap_ops = {
.prepare = hyperv_prepare_irq_remapping,
.enable = hyperv_enable_irq_remapping,
- .get_irq_domain = hyperv_get_irq_domain,
};

#endif
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 511dfb4884bc..ccf61cd18f69 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1128,29 +1128,12 @@ static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
irte->redir_hint = 1;
}

-static struct irq_domain *intel_get_irq_domain(struct irq_alloc_info *info)
-{
- if (!info)
- return NULL;
-
- switch (info->type) {
- case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT:
- return map_ioapic_to_ir(info->devid);
- case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT:
- return map_hpet_to_ir(info->devid);
- default:
- WARN_ON_ONCE(1);
- return NULL;
- }
-}
-
struct irq_remap_ops intel_irq_remap_ops = {
.prepare = intel_prepare_irq_remapping,
.enable = intel_enable_irq_remapping,
.disable = disable_irq_remapping,
.reenable = reenable_irq_remapping,
.enable_faulting = enable_drhd_fault_handling,
- .get_irq_domain = intel_get_irq_domain,
};

static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
@@ -1435,7 +1418,20 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain,
modify_irte(&data->irq_2_iommu, &entry);
}

+static int intel_irq_remapping_select(struct irq_domain *d,
+ struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token)
+{
+ if (x86_fwspec_is_ioapic(fwspec))
+ return d == map_ioapic_to_ir(fwspec->param[0]);
+ else if (x86_fwspec_is_hpet(fwspec))
+ return d == map_hpet_to_ir(fwspec->param[0]);
+
+ return 0;
+}
+
static const struct irq_domain_ops intel_ir_domain_ops = {
+ .select = intel_irq_remapping_select,
.alloc = intel_irq_remapping_alloc,
.free = intel_irq_remapping_free,
.activate = intel_irq_remapping_activate,
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 2d84b1ed205e..83314b9d8f38 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -158,17 +158,3 @@ void panic_if_irq_remap(const char *msg)
if (irq_remapping_enabled)
panic(msg);
}
-
-/**
- * irq_remapping_get_irq_domain - Get the irqdomain serving the request @info
- * @info: interrupt allocation information, used to identify the IOMMU device
- *
- * Returns pointer to IRQ domain, or NULL on failure.
- */
-struct irq_domain *irq_remapping_get_irq_domain(struct irq_alloc_info *info)
-{
- if (!remap_ops || !remap_ops->get_irq_domain)
- return NULL;
-
- return remap_ops->get_irq_domain(info);
-}
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index 1661b3d75920..8c89cb947cdb 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -42,9 +42,6 @@ struct irq_remap_ops {

/* Enable fault handling */
int (*enable_faulting)(void);
-
- /* Get the irqdomain associated to IOMMU device */
- struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *);
};

extern struct irq_remap_ops intel_irq_remap_ops;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 76cd7ebd1178..6440f97c412e 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -42,7 +42,16 @@ static inline void debugfs_add_domain_dir(struct irq_domain *d) { }
static inline void debugfs_remove_domain_dir(struct irq_domain *d) { }
#endif

-const struct fwnode_operations irqchip_fwnode_ops;
+static const char *irqchip_fwnode_get_name(const struct fwnode_handle *fwnode)
+{
+ struct irqchip_fwid *fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
+
+ return fwid->name;
+}
+
+const struct fwnode_operations irqchip_fwnode_ops = {
+ .get_name = irqchip_fwnode_get_name,
+};
EXPORT_SYMBOL_GPL(irqchip_fwnode_ops);

/**

Attachment: smime.p7s
Description: S/MIME cryptographic signature