RE: [PATCH RFC v2 02/18] irq/dev-msi: Add support for a new DEV_MSI irq domain

From: Dey, Megha
Date: Wed Aug 05 2020 - 15:00:39 EST


Hi Marc,

> -----Original Message-----
> From: Marc Zyngier <maz@xxxxxxxxxx>
> Sent: Wednesday, July 22, 2020 11:53 AM
> To: Jiang, Dave <dave.jiang@xxxxxxxxx>
> Cc: vkoul@xxxxxxxxxx; Dey, Megha <megha.dey@xxxxxxxxx>;
> bhelgaas@xxxxxxxxxx; rafael@xxxxxxxxxx; gregkh@xxxxxxxxxxxxxxxxxxx;
> tglx@xxxxxxxxxxxxx; hpa@xxxxxxxxx; alex.williamson@xxxxxxxxxx; Pan, Jacob
> jun <jacob.jun.pan@xxxxxxxxx>; Raj, Ashok <ashok.raj@xxxxxxxxx>;
> jgg@xxxxxxxxxxxx; Liu, Yi L <yi.l.liu@xxxxxxxxx>; Lu, Baolu
> <baolu.lu@xxxxxxxxx>; Tian, Kevin <kevin.tian@xxxxxxxxx>; Kumar, Sanjay K
> <sanjay.k.kumar@xxxxxxxxx>; Luck, Tony <tony.luck@xxxxxxxxx>; Lin, Jing
> <jing.lin@xxxxxxxxx>; Williams, Dan J <dan.j.williams@xxxxxxxxx>;
> kwankhede@xxxxxxxxxx; eric.auger@xxxxxxxxxx; parav@xxxxxxxxxxxx;
> Hansen, Dave <dave.hansen@xxxxxxxxx>; netanelg@xxxxxxxxxxxx;
> shahafs@xxxxxxxxxxxx; yan.y.zhao@xxxxxxxxxxxxxxx; pbonzini@xxxxxxxxxx;
> Ortiz, Samuel <samuel.ortiz@xxxxxxxxx>; Hossain, Mona
> <mona.hossain@xxxxxxxxx>; dmaengine@xxxxxxxxxxxxxxx; linux-
> kernel@xxxxxxxxxxxxxxx; x86@xxxxxxxxxx; linux-pci@xxxxxxxxxxxxxxx;
> kvm@xxxxxxxxxxxxxxx
> Subject: Re: [PATCH RFC v2 02/18] irq/dev-msi: Add support for a new DEV_MSI
> irq domain
>
> On Tue, 21 Jul 2020 17:02:28 +0100,
> Dave Jiang <dave.jiang@xxxxxxxxx> wrote:
> >
> > From: Megha Dey <megha.dey@xxxxxxxxx>
> >
> > Add support for the creation of a new DEV_MSI irq domain. It creates a
> > new irq chip associated with the DEV_MSI domain and adds the necessary
> > domain operations to it.
> >
> > Add a new config option DEV_MSI which must be enabled by any driver
> > that wants to support device-specific message-signaled-interrupts
> > outside of PCI-MSI(-X).
>
> Which is exactly what platform-MSI already does. Why do we need something
> else?

True, dev-msi is a mere extension of platform-msi, which apart from providing a
custom write msg also provides a custom mask/unmask to the device.
Also, we introduce a new IRQ domain to be associated with these classes of devices.
There is nothing more to dev-msi than this currently.

>
> >
> > Lastly, add device specific mask/unmask callbacks in addition to a
> > write function to the platform_msi_ops.
> >
> > Reviewed-by: Dan Williams <dan.j.williams@xxxxxxxxx>
> > Signed-off-by: Megha Dey <megha.dey@xxxxxxxxx>
> > Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx>
> > ---
> > arch/x86/include/asm/hw_irq.h | 5 ++
> > drivers/base/Kconfig | 7 +++
> > drivers/base/Makefile | 1
> > drivers/base/dev-msi.c | 95
> +++++++++++++++++++++++++++++++++++++++++
> > drivers/base/platform-msi.c | 45 +++++++++++++------
> > drivers/base/platform-msi.h | 23 ++++++++++
> > include/linux/msi.h | 8 +++
> > 7 files changed, 168 insertions(+), 16 deletions(-) create mode
> > 100644 drivers/base/dev-msi.c create mode 100644
> > drivers/base/platform-msi.h
> >
> > diff --git a/arch/x86/include/asm/hw_irq.h
> > b/arch/x86/include/asm/hw_irq.h index 74c12437401e..8ecd7570589d
> > 100644
> > --- a/arch/x86/include/asm/hw_irq.h
> > +++ b/arch/x86/include/asm/hw_irq.h
> > @@ -61,6 +61,11 @@ struct irq_alloc_info {
> > irq_hw_number_t msi_hwirq;
> > };
> > #endif
> > +#ifdef CONFIG_DEV_MSI
> > + struct {
> > + irq_hw_number_t hwirq;
> > + };
> > +#endif
> > #ifdef CONFIG_X86_IO_APIC
> > struct {
> > int ioapic_id;
> > diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index
> > 8d7001712062..f00901bac056 100644
> > --- a/drivers/base/Kconfig
> > +++ b/drivers/base/Kconfig
> > @@ -210,4 +210,11 @@ config GENERIC_ARCH_TOPOLOGY
> > appropriate scaling, sysfs interface for reading capacity values at
> > runtime.
> >
> > +config DEV_MSI
> > + bool "Device Specific Interrupt Messages"
> > + select IRQ_DOMAIN_HIERARCHY
> > + select GENERIC_MSI_IRQ_DOMAIN
> > + help
> > + Allow device drivers to generate device-specific interrupt messages
> > + for devices independent of PCI MSI/-X.
> > endmenu
> > diff --git a/drivers/base/Makefile b/drivers/base/Makefile index
> > 157452080f3d..ca1e4d39164e 100644
> > --- a/drivers/base/Makefile
> > +++ b/drivers/base/Makefile
> > @@ -21,6 +21,7 @@ obj-$(CONFIG_REGMAP) += regmap/
> > obj-$(CONFIG_SOC_BUS) += soc.o
> > obj-$(CONFIG_PINCTRL) += pinctrl.o
> > obj-$(CONFIG_DEV_COREDUMP) += devcoredump.o
> > +obj-$(CONFIG_DEV_MSI) += dev-msi.o
> > obj-$(CONFIG_GENERIC_MSI_IRQ_DOMAIN) += platform-msi.o
> > obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += arch_topology.o
> >
> > diff --git a/drivers/base/dev-msi.c b/drivers/base/dev-msi.c new file
> > mode 100644 index 000000000000..240ccc353933
> > --- /dev/null
> > +++ b/drivers/base/dev-msi.c
> > @@ -0,0 +1,95 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright © 2020 Intel Corporation.
> > + *
> > + * Author: Megha Dey <megha.dey@xxxxxxxxx> */
> > +
> > +#include <linux/irq.h>
> > +#include <linux/irqdomain.h>
> > +#include <linux/msi.h>
> > +#include "platform-msi.h"
> > +
> > +struct irq_domain *dev_msi_default_domain;
> > +
> > +static irq_hw_number_t dev_msi_get_hwirq(struct msi_domain_info *info,
> > + msi_alloc_info_t *arg)
> > +{
> > + return arg->hwirq;
> > +}
> > +
> > +static irq_hw_number_t dev_msi_calc_hwirq(struct msi_desc *desc) {
> > + u32 devid;
> > +
> > + devid = desc->platform.msi_priv_data->devid;
> > +
> > + return (devid << (32 - DEV_ID_SHIFT)) | desc->platform.msi_index; }
> > +
> > +static void dev_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc
> > +*desc) {
> > + arg->hwirq = dev_msi_calc_hwirq(desc); }
> > +
> > +static int dev_msi_prepare(struct irq_domain *domain, struct device *dev,
> > + int nvec, msi_alloc_info_t *arg) {
> > + memset(arg, 0, sizeof(*arg));
> > +
> > + return 0;
> > +}
> > +
> > +static struct msi_domain_ops dev_msi_domain_ops = {
> > + .get_hwirq = dev_msi_get_hwirq,
> > + .set_desc = dev_msi_set_desc,
> > + .msi_prepare = dev_msi_prepare,
> > +};
> > +
> > +static struct irq_chip dev_msi_controller = {
> > + .name = "DEV-MSI",
> > + .irq_unmask = platform_msi_unmask_irq,
> > + .irq_mask = platform_msi_mask_irq,
>
> This seems pretty odd, see below.

Ok..
>
> > + .irq_write_msi_msg = platform_msi_write_msg,
> > + .irq_ack = irq_chip_ack_parent,
> > + .irq_retrigger = irq_chip_retrigger_hierarchy,
> > + .flags = IRQCHIP_SKIP_SET_WAKE,
> > +};
> > +
> > +static struct msi_domain_info dev_msi_domain_info = {
> > + .flags = MSI_FLAG_USE_DEF_DOM_OPS |
> MSI_FLAG_USE_DEF_CHIP_OPS,
> > + .ops = &dev_msi_domain_ops,
> > + .chip = &dev_msi_controller,
> > + .handler = handle_edge_irq,
> > + .handler_name = "edge",
> > +};
> > +
> > +static int __init create_dev_msi_domain(void) {
> > + struct irq_domain *parent = NULL;
> > + struct fwnode_handle *fn;
> > +
> > + /*
> > + * Modern code should never have to use irq_get_default_host. But
> since
> > + * dev-msi is invisible to DT/ACPI, this is an exception case.
> > + */
> > + parent = irq_get_default_host();
>
> Really? How is it going to work once you have devices sending their MSIs to two
> different downstream blocks? This looks rather short-sighted.

So after some thought, I've realized that we don’t need to introduce 2 IRQ domains- with/without
Interrupt remapping enabled.
Hence, the above is void in the next version of patches.
>
> > + if (!parent)
> > + return -ENXIO;
> > +
> > + fn = irq_domain_alloc_named_fwnode("DEV_MSI");
> > + if (!fn)
> > + return -ENXIO;
> > +
> > + dev_msi_default_domain = msi_create_irq_domain(fn,
> &dev_msi_domain_info, parent);
> > + if (!dev_msi_default_domain) {
> > + pr_warn("failed to initialize irqdomain for DEV-MSI.\n");
> > + return -ENXIO;
> > + }
> > +
> > + irq_domain_update_bus_token(dev_msi_default_domain,
> DOMAIN_BUS_PLATFORM_MSI);
> > + irq_domain_free_fwnode(fn);
> > +
> > + return 0;
> > +}
> > +device_initcall(create_dev_msi_domain);
> > diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
> > index 9d94cd699468..5e1f210d65ee 100644
> > --- a/drivers/base/platform-msi.c
> > +++ b/drivers/base/platform-msi.c
> > @@ -12,21 +12,7 @@
> > #include <linux/irqdomain.h>
> > #include <linux/msi.h>
> > #include <linux/slab.h>
> > -
> > -#define DEV_ID_SHIFT 21
> > -#define MAX_DEV_MSIS (1 << (32 - DEV_ID_SHIFT))
> > -
> > -/*
> > - * Internal data structure containing a (made up, but unique) devid
> > - * and the platform-msi ops
> > - */
> > -struct platform_msi_priv_data {
> > - struct device *dev;
> > - void *host_data;
> > - msi_alloc_info_t arg;
> > - const struct platform_msi_ops *ops;
> > - int devid;
> > -};
> > +#include "platform-msi.h"
> >
> > /* The devid allocator */
> > static DEFINE_IDA(platform_msi_devid_ida);
> > @@ -76,7 +62,7 @@ static void platform_msi_update_dom_ops(struct
> msi_domain_info *info)
> > ops->set_desc = platform_msi_set_desc; }
> >
> > -static void platform_msi_write_msg(struct irq_data *data, struct
> > msi_msg *msg)
> > +void platform_msi_write_msg(struct irq_data *data, struct msi_msg
> > +*msg)
>
> It really begs the question: Why are you inventing a whole new "DEV-MSI" when
> this really is platform-MSI?

platform-msi is platform custom, but device-driver opaque MSI setup/control. With dev-msi, we add the
following
1. device specific mask/unmask functions
2. new dev-msi domain to setup/control MSI on these devices
3. explicitly deny pci devices from using the dev_msi alloc/free calls, something not currently in platform-msi..

We are not really inventing anything new, but only extending platform-msi to cover new groups of devices.
We will be sending out the next version of patches shortly, please let me know if you have any naming suggestions
for this extension.

>
> > {
> > struct msi_desc *desc = irq_data_get_msi_desc(data);
> > struct platform_msi_priv_data *priv_data; @@ -86,6 +72,33 @@ static
> > void platform_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
> > priv_data->ops->write_msg(desc, msg); }
> >
> > +static void __platform_msi_desc_mask_unmask_irq(struct msi_desc
> > +*desc, u32 mask) {
> > + const struct platform_msi_ops *ops;
> > +
> > + ops = desc->platform.msi_priv_data->ops;
> > + if (!ops)
> > + return;
> > +
> > + if (mask) {
> > + if (ops->irq_mask)
> > + ops->irq_mask(desc);
> > + } else {
> > + if (ops->irq_unmask)
> > + ops->irq_unmask(desc);
> > + }
> > +}
> > +
> > +void platform_msi_mask_irq(struct irq_data *data) {
> > + __platform_msi_desc_mask_unmask_irq(irq_data_get_msi_desc(data),
> 1);
> > +}
> > +
> > +void platform_msi_unmask_irq(struct irq_data *data) {
> > + __platform_msi_desc_mask_unmask_irq(irq_data_get_msi_desc(data),
> 0);
> > +}
> > +
>
> I don't immediately get why you have this code at the platform MSI level. Until
> now, we only had the programming of the message into the end-point, which is
> a device-specific action (and the whole reason why this silly platform MSI exists)
>
> On the other hand, masking an interrupt is an irqchip operation, and only
> concerns the irqchip level. Here, you seem to be making it an end-point
> operation, which doesn't really make sense to me. Or is this device its own
> interrupt controller as well? That would be extremely surprising, and I'd expect
> some block downstream of the device to be able to control the masking of the
> interrupt.

Hmmm, I don’t fully understand this. Ultimately the mask/unmask is a device operation right?
Some new devices may want the option to mask/unmask interrupts at a non-standard location.
These callbacks are a way for the device to inform how exactly interrupts could be masked/unmasked
on my device, no different from pci mask/unmask, except this is at a custom location...

>
> > static void platform_msi_update_chip_ops(struct msi_domain_info
> > *info) {
> > struct irq_chip *chip = info->chip;
> > diff --git a/drivers/base/platform-msi.h b/drivers/base/platform-msi.h
> > new file mode 100644 index 000000000000..1de8c2874218
> > --- /dev/null
> > +++ b/drivers/base/platform-msi.h
> > @@ -0,0 +1,23 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * Copyright © 2020 Intel Corporation.
> > + *
> > + * Author: Megha Dey <megha.dey@xxxxxxxxx> */
>
> Or not. You are merely moving existing code, not authoring it. Either keep the
> original copyright attribution, or drop this mention altogether.

sure
>
> > +
> > +#include <linux/msi.h>
> > +
> > +#define DEV_ID_SHIFT 21
> > +#define MAX_DEV_MSIS (1 << (32 - DEV_ID_SHIFT))
> > +
> > +/*
> > + * Data structure containing a (made up, but unique) devid
> > + * and the platform-msi ops.
> > + */
> > +struct platform_msi_priv_data {
> > + struct device *dev;
> > + void *host_data;
> > + msi_alloc_info_t arg;
> > + const struct platform_msi_ops *ops;
> > + int devid;
> > +};
> > diff --git a/include/linux/msi.h b/include/linux/msi.h index
> > 7f6a8eb51aca..1da97f905720 100644
> > --- a/include/linux/msi.h
> > +++ b/include/linux/msi.h
> > @@ -323,9 +323,13 @@ enum {
> >
> > /*
> > * platform_msi_ops - Callbacks for platform MSI ops
> > + * @irq_mask: mask an interrupt source
> > + * @irq_unmask: unmask an interrupt source
> > * @write_msg: write message content
> > */
> > struct platform_msi_ops {
> > + unsigned int (*irq_mask)(struct msi_desc *desc);
> > + unsigned int (*irq_unmask)(struct msi_desc *desc);
> > irq_write_msi_msg_t write_msg;
> > };
> >
> > @@ -370,6 +374,10 @@ int platform_msi_domain_alloc(struct irq_domain
> > *domain, unsigned int virq, void platform_msi_domain_free(struct irq_domain
> *domain, unsigned int virq,
> > unsigned int nvec);
> > void *platform_msi_get_host_data(struct irq_domain *domain);
> > +
> > +void platform_msi_write_msg(struct irq_data *data, struct msi_msg
> > +*msg); void platform_msi_unmask_irq(struct irq_data *data); void
> > +platform_msi_mask_irq(struct irq_data *data);
> > #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
> >
> > #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
> >
> >
>
> Thanks,
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.