RE: [EXTERNAL] Re: [PATCH v3 1/2] PCI: hv: Make the code arch neutral by adding arch specific interfaces
From: Sunil Muthuswamy
Date: Tue Nov 09 2021 - 14:38:59 EST
On Sunday, October 24, 2021 5:17 AM,
Marc Zyngier <maz@xxxxxxxxxx> wrote:
> > From: Sunil Muthuswamy <sunilmut@xxxxxxxxxxxxx>
> >
> > Encapsulate arch dependencies in Hyper-V vPCI through a set of interfaces,
> > listed below. Adding these arch specific interfaces will allow for an
> > implementation for other arch, such as ARM64.
> >
> > Implement the interfaces for X64, which is essentially just moving over the
> > current implementation.
>
> Nit: use architecture names and capitalisation that match their use in
> the kernel (arm64, x86) instead of the MS-specific lingo.
Thanks, will fix in v4.
> > +
> > +#ifdef CONFIG_X86_64
> > +int hv_pci_irqchip_init(struct irq_domain **parent_domain,
> > + bool *fasteoi_handler,
> > + u8 *delivery_mode)
> > +{
> > + *parent_domain = x86_vector_domain;
> > + *fasteoi_handler = false;
> > + *delivery_mode = APIC_DELIVERY_MODE_FIXED;
> > +
> > + return 0;
> > +}
> > +EXPORT_SYMBOL(hv_pci_irqchip_init);
>
> Why do you need to export any of these symbols? Even if the two
> objects are compiled separately, there is absolutely no need to make
> them two separate modules.
>
> Also, returning 3 values like this makes little sense. Pass a pointer
> to the structure that requires them and populate it as required. Or
> simply #define those that are constants.
Thanks. In v4, I am moving everything back to pci-hyperv.c and this
will get addressed as part of that.
> > +
> > +void hv_pci_irqchip_free(void) {}
> > +EXPORT_SYMBOL(hv_pci_irqchip_free);
> > +
> > +unsigned int hv_msi_get_int_vector(struct irq_data *data)
> > +{
> > + struct irq_cfg *cfg = irqd_cfg(data);
> > +
> > + return cfg->vector;
> > +}
> > +EXPORT_SYMBOL(hv_msi_get_int_vector);
> > +
> > +void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry,
> > + struct msi_desc *msi_desc)
> > +{
> > + msi_entry->address.as_uint32 = msi_desc->msg.address_lo;
> > + msi_entry->data.as_uint32 = msi_desc->msg.data;
> > +}
> > +EXPORT_SYMBOL(hv_set_msi_entry_from_desc);
> > +
> > +int hv_msi_prepare(struct irq_domain *domain, struct device *dev,
> > + int nvec, msi_alloc_info_t *info)
> > +{
> > + return pci_msi_prepare(domain, dev, nvec, info);
> > +}
> > +EXPORT_SYMBOL(hv_msi_prepare);
>
> This looks like a very unnecessary level of indirection, given that
> you end-up with an empty callback in the arm64 code. The following
> works just as well and avoids useless callbacks:
>
> #ifdef CONFIG_ARM64
> #define pci_msi_prepare NULL
> #endif
Will get addressed in v4.
> >
> > +static struct irq_domain *parent_domain;
> > +static bool fasteoi;
> > +static u8 delivery_mode;
>
> See my earlier comment about how clumsy this is.
Thanks. Getting fixed in v4 as part of moving things back to pci-hyperv.c
> > /*
> > - * Honoring apic->delivery_mode set to APIC_DELIVERY_MODE_FIXED
> by
> > - * setting the HV_DEVICE_INTERRUPT_TARGET_MULTICAST flag results
> in a
> > + * For x64, honoring apic->delivery_mode set to
> > + * APIC_DELIVERY_MODE_FIXED by setting the
> > + * HV_DEVICE_INTERRUPT_TARGET_MULTICAST flag results in a
> > * spurious interrupt storm. Not doing so does not seem to have a
> > * negative effect (yet?).
>
> And what does it mean on other architectures?
The same applies to other architectures. Will address the comment update
In v4.
> > */
> > @@ -1347,7 +1349,7 @@ static u32 hv_compose_msi_req_v1(
> > int_pkt->wslot.slot = slot;
> > int_pkt->int_desc.vector = vector;
> > int_pkt->int_desc.vector_count = 1;
> > - int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
> > + int_pkt->int_desc.delivery_mode = delivery_mode;
> >
> > /*
> > * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget
> in
> > @@ -1377,7 +1379,7 @@ static u32 hv_compose_msi_req_v2(
> > int_pkt->wslot.slot = slot;
> > int_pkt->int_desc.vector = vector;
> > int_pkt->int_desc.vector_count = 1;
> > - int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
> > + int_pkt->int_desc.delivery_mode = delivery_mode;
> > cpu = hv_compose_msi_req_get_cpu(affinity);
> > int_pkt->int_desc.processor_array[0] =
> > hv_cpu_number_to_vp_number(cpu);
> > @@ -1397,7 +1399,7 @@ static u32 hv_compose_msi_req_v3(
> > int_pkt->int_desc.vector = vector;
> > int_pkt->int_desc.reserved = 0;
> > int_pkt->int_desc.vector_count = 1;
> > - int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
> > + int_pkt->int_desc.delivery_mode = delivery_mode;
> > cpu = hv_compose_msi_req_get_cpu(affinity);
> > int_pkt->int_desc.processor_array[0] =
> > hv_cpu_number_to_vp_number(cpu);
> > @@ -1419,7 +1421,6 @@ static u32 hv_compose_msi_req_v3(
> > */
> > static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
> > {
> > - struct irq_cfg *cfg = irqd_cfg(data);
> > struct hv_pcibus_device *hbus;
> > struct vmbus_channel *channel;
> > struct hv_pci_dev *hpdev;
> > @@ -1470,7 +1471,7 @@ static void hv_compose_msi_msg(struct irq_data
> *data, struct msi_msg *msg)
> > size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
> > dest,
> > hpdev->desc.win_slot.slot,
> > - cfg->vector);
> > + hv_msi_get_int_vector(data));
> > break;
> >
> > case PCI_PROTOCOL_VERSION_1_2:
> > @@ -1478,14 +1479,14 @@ static void hv_compose_msi_msg(struct irq_data
> *data, struct msi_msg *msg)
> > size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
> > dest,
> > hpdev->desc.win_slot.slot,
> > - cfg->vector);
> > + hv_msi_get_int_vector(data));
> > break;
> >
> > case PCI_PROTOCOL_VERSION_1_4:
> > size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3,
> > dest,
> > hpdev->desc.win_slot.slot,
> > - cfg->vector);
> > + hv_msi_get_int_vector(data));
> > break;
> >
> > default:
> > @@ -1601,7 +1602,7 @@ static struct irq_chip hv_msi_irq_chip = {
> > };
> >
> > static struct msi_domain_ops hv_msi_ops = {
> > - .msi_prepare = pci_msi_prepare,
> > + .msi_prepare = hv_msi_prepare,
> > .msi_free = hv_msi_free,
> > };
> >
> > @@ -1625,12 +1626,13 @@ static int hv_pcie_init_irq_domain(struct
> hv_pcibus_device *hbus)
> > hbus->msi_info.flags = (MSI_FLAG_USE_DEF_DOM_OPS |
> > MSI_FLAG_USE_DEF_CHIP_OPS | MSI_FLAG_MULTI_PCI_MSI |
> > MSI_FLAG_PCI_MSIX);
> > - hbus->msi_info.handler = handle_edge_irq;
> > - hbus->msi_info.handler_name = "edge";
> > + hbus->msi_info.handler =
> > + fasteoi ? handle_fasteoi_irq : handle_edge_irq;
> > + hbus->msi_info.handler_name = fasteoi ? "fasteoi" : "edge";
>
> The fact that you somehow need to know what the GIC is using as a flow
> handler is a sure sign that you are doing something wrong. In a
> hierarchical setup, only the root of the hierarchy should ever know
> about that. Having anything there is actively wrong.
Thanks, comments below.
> > hbus->msi_info.data = hbus;
> > hbus->irq_domain = pci_msi_create_irq_domain(hbus->fwnode,
> > &hbus->msi_info,
> > - x86_vector_domain);
> > + parent_domain);
> > if (!hbus->irq_domain) {
> > dev_err(&hbus->hdev->device,
> > "Failed to build an MSI IRQ domain\n");
> > @@ -3531,13 +3533,21 @@ static void __exit exit_hv_pci_drv(void)
> > hvpci_block_ops.read_block = NULL;
> > hvpci_block_ops.write_block = NULL;
> > hvpci_block_ops.reg_blk_invalidate = NULL;
> > +
> > + hv_pci_irqchip_free();
> > }
> >
> > static int __init init_hv_pci_drv(void)
> > {
> > + int ret;
> > +
> > if (!hv_is_hyperv_initialized())
> > return -ENODEV;
> >
> > + ret = hv_pci_irqchip_init(&parent_domain, &fasteoi, &delivery_mode);
> > + if (ret)
> > + return ret;
>
> Having established that the fasteoi thing is nothing but a bug, that
> the delivery_mode is a constant, and that all that matters is actually
> the parent domain which is a global pointer on x86, and something that
> gets allocated on arm64, you can greatly simplify the whole thing:
>
> #ifdef CONFIG_X86
> #define DELIVERY_MODE APIC_DELIVERY_MODE_FIXED
> #define FLOW_HANDLER handle_edge_irq
> #define FLOW_NAME "edge"
>
> static struct irq_domain *hv_pci_get_root_domain(void)
> {
> return x86_vector_domain;
> }
> #endif
>
> #ifdef CONFIG_ARM64
> #define DELIVERY_MODE 0
> #define FLOW_HANDLER NULL
> #define FLOW_NAME NULL
> #define pci_msi_prepare NULL
>
> static struct irq_domain *hv_pci_get_root_domain(void)
> {
> [...]
> }
> #endif
Thanks. I have followed the above suggestion in v4.
> as once you look at it seriously, the whole "separate file for the IRQ
> code" is totally unnecessary (as Michael pointed out earlier), because
> the abstractions you are adding are for most of them unnecessary.
V4 will get rid of the separate file for the IRQ chip and that's getting
moved back to pci-hyperv.c and that should address this comment.
Thanks.
- Sunil