RE: [RFC Part2 v1 15/21] x86, MSI: Use hierarchy irqdomain to manage MSI interrupts

From: Ni, Xun
Date: Thu Sep 11 2014 - 10:17:49 EST


It has mis-understandings in your word" helps to make the and and architecture" ...

Thanks
Xun

-----Original Message-----
From: linux-pci-owner@xxxxxxxxxxxxxxx [mailto:linux-pci-owner@xxxxxxxxxxxxxxx] On Behalf Of Jiang Liu
Sent: Thursday, September 11, 2014 10:04 PM
To: Benjamin Herrenschmidt; Thomas Gleixner; Ingo Molnar; H. Peter Anvin; Rafael J. Wysocki; Bjorn Helgaas; Randy Dunlap; Yinghai Lu; Borislav Petkov; Grant Likely; Marc Zyngier
Cc: Jiang Liu; Konrad Rzeszutek Wilk; Andrew Morton; Luck, Tony; Joerg Roedel; Greg Kroah-Hartman; x86@xxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; linux-pci@xxxxxxxxxxxxxxx; linux-acpi@xxxxxxxxxxxxxxx; linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
Subject: [RFC Part2 v1 15/21] x86, MSI: Use hierarchy irqdomain to manage MSI interrupts

Enhance MSI code to support hierarchy irqdomain, it helps to make the and and architecture more clear.


Signed-off-by: Jiang Liu <jiang.liu@xxxxxxxxxxxxxxx>
---
arch/x86/include/asm/hw_irq.h | 6 +
arch/x86/include/asm/irq_remapping.h | 6 +-
arch/x86/kernel/apic/msi.c | 225 +++++++++++++++++++++++++++++-----
arch/x86/kernel/apic/vector.c | 2 +
drivers/iommu/irq_remapping.c | 1 -
5 files changed, 204 insertions(+), 36 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 57f81f5a9686..9f705c49f850 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -199,6 +199,12 @@ static inline void lock_vector_lock(void) {} static inline void unlock_vector_lock(void) {}
#endif /* CONFIG_X86_LOCAL_APIC */

+#ifdef CONFIG_PCI_MSI
+extern void arch_init_msi_domain(struct irq_domain *domain); #else
+static inline void arch_init_msi_domain(struct irq_domain *domain) { }
+#endif
+
/* Statistics */
extern atomic_t irq_err_count;
extern atomic_t irq_mis_count;
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 428b4e6d637c..440053ca7515 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -73,11 +73,7 @@ extern void irq_remapping_print_chip(struct irq_data *data, struct seq_file *p);
* Create MSI/MSIx irqdomain for interrupt remapping device, use @parent as
* parent irqdomain.
*/
-static inline struct irq_domain *
-arch_create_msi_irq_domain(struct irq_domain *parent) -{
- return NULL;
-}
+extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain
+*parent);

/* Get parent irqdomain for interrupt remapping irqdomain */ static inline struct irq_domain *arch_get_ir_parent_domain(void) diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 709fedab44f2..5696703271af 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -3,6 +3,8 @@
*
* Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
* Moved from arch/x86/kernel/apic/io_apic.c.
+ * Jiang Liu <jiang.liu@xxxxxxxxxxxxxxx>
+ * Add support of hierarchy irqdomain
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as @@ -21,6 +23,8 @@ #include <asm/apic.h> #include <asm/irq_remapping.h>

+static struct irq_domain *msi_default_domain;
+
void native_compose_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id) @@ -76,28 +80,32 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
return 0;
}

-static int
-msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+static bool msi_remapped(struct irq_domain *domain)
{
- struct irq_cfg *cfg = irqd_cfg(data);
- struct msi_msg msg;
- unsigned int dest;
- int ret;
-
- ret = apic_set_affinity(data, mask, &dest);
- if (ret)
- return ret;
+ return domain->host_data != NULL;
+}

- __get_cached_msi_msg(data->msi_desc, &msg);
+static int msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
+{
+ struct irq_data *parent = data->parent_data;
+ int ret;

- msg.data &= ~MSI_DATA_VECTOR_MASK;
- msg.data |= MSI_DATA_VECTOR(cfg->vector);
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ /* No need to reprogram MSI registers if interrupt is remapped */
+ if (ret >= 0 && !msi_remapped(data->domain)) {
+ struct irq_cfg *cfg = irqd_cfg(data);
+ struct msi_msg msg;

- __write_msi_msg(data->msi_desc, &msg);
+ __get_cached_msi_msg(data->msi_desc, &msg);
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+ msg.address_lo |= MSI_ADDR_DEST_ID(cfg->dest_apicid);
+ __write_msi_msg(data->msi_desc, &msg);
+ }

- return IRQ_SET_MASK_OK_NOCOPY;
+ return ret;
}

/*
@@ -108,9 +116,105 @@ static struct irq_chip msi_chip = {
.name = "PCI-MSI",
.irq_unmask = unmask_msi_irq,
.irq_mask = mask_msi_irq,
- .irq_ack = apic_ack_edge,
+ .irq_ack = irq_chip_ack_parent,
.irq_set_affinity = msi_set_affinity,
- .irq_retrigger = apic_retrigger_irq,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_print_chip = irq_remapping_print_chip,
+};
+
+static inline irq_hw_number_t
+get_hwirq_from_pcidev(struct pci_dev *pdev, struct msi_desc *msidesc) {
+ return (irq_hw_number_t)msidesc->msi_attrib.entry_nr |
+ PCI_DEVID(pdev->bus->number, pdev->devfn) << 11 |
+ (pci_domain_nr(pdev->bus) & 0xFFFFFFFF) << 27; }
+
+static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg) {
+ int i, ret;
+ irq_hw_number_t hwirq;
+ struct irq_alloc_info *info = arg;
+
+ hwirq = get_hwirq_from_pcidev(info->msi_dev, info->msi_desc);
+ if (irq_find_mapping(domain, hwirq) > 0)
+ return -EEXIST;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < nr_irqs; i++) {
+ irq_set_msi_desc_off(virq, i, info->msi_desc);
+ irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+ &msi_chip, (void *)(long)i);
+ __irq_set_handler(virq + i, handle_edge_irq, 0, "edge");
+ dev_dbg(&info->msi_dev->dev, "irq %d for MSI/MSI-X\n",
+ virq + i);
+ }
+
+ return ret;
+}
+
+static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ int i;
+ struct msi_desc *msidesc = irq_get_msi_desc(virq);
+
+ if (msidesc)
+ msidesc->irq = 0;
+ for (i = 0; i < nr_irqs; i++) {
+ irq_set_handler(virq + i, NULL);
+ irq_domain_set_hwirq_and_chip(domain, virq + i, 0, NULL, NULL);
+ }
+ irq_domain_free_irqs_parent(domain, virq, nr_irqs); }
+
+static int msi_domain_activate(struct irq_domain *domain,
+ struct irq_data *irq_data)
+{
+ struct msi_msg msg;
+ struct irq_cfg *cfg = irqd_cfg(irq_data);
+
+ /*
+ * irq_data->chip_data is MSI/MSIx offset.
+ * MSI-X message is written per-IRQ, the offset is always 0.
+ * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
+ */
+ if (irq_data->chip_data)
+ return 0;
+
+ if (msi_remapped(domain))
+ irq_remapping_get_msi_entry(irq_data->parent_data, &msg);
+ else
+ native_compose_msi_msg(NULL, irq_data->irq, cfg->dest_apicid,
+ &msg, 0);
+ write_msi_msg(irq_data->irq, &msg);
+
+ return 0;
+}
+
+static int msi_domain_deactivate(struct irq_domain *domain,
+ struct irq_data *irq_data)
+{
+ struct msi_msg msg;
+
+ if (irq_data->chip_data)
+ return 0;
+
+ memset(&msg, 0, sizeof(msg));
+ write_msi_msg(irq_data->irq, &msg);
+
+ return 0;
+}
+
+static struct irq_domain_ops msi_domain_ops = {
+ .alloc = msi_domain_alloc,
+ .free = msi_domain_free,
+ .activate = msi_domain_activate,
+ .deactivate = msi_domain_deactivate,
};

int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, @@ -145,25 +249,56 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,

int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) {
+ int irq, cnt, nvec_pow2;
+ struct irq_domain *domain;
struct msi_desc *msidesc;
- int irq, ret;
+ struct irq_alloc_info info;
+ int node = dev_to_node(&dev->dev);
+
+ if (disable_apic)
+ return -ENOSYS;

- /* Multiple MSI vectors only supported with interrupt remapping */
- if (type == PCI_CAP_ID_MSI && nvec > 1)
- return 1;
+ init_irq_alloc_info(&info, NULL);
+ info.msi_dev = dev;
+ if (type == PCI_CAP_ID_MSI) {
+ msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
+ WARN_ON(!list_is_singular(&dev->msi_list));
+ WARN_ON(msidesc->irq);
+ WARN_ON(msidesc->msi_attrib.multiple);
+ WARN_ON(msidesc->nvec_used);
+ info.type = X86_IRQ_ALLOC_TYPE_MSI;
+ cnt = nvec;
+ } else {
+ info.type = X86_IRQ_ALLOC_TYPE_MSIX;
+ cnt = 1;
+ }
+
+ domain = irq_remapping_get_irq_domain(&info);
+ if (domain == NULL) {
+ /*
+ * Multiple MSI vectors only supported with interrupt
+ * remapping
+ */
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
+ domain = msi_default_domain;
+ }
+ if (domain == NULL)
+ return -ENOSYS;

list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = irq_domain_alloc_irqs(NULL, -1, 1, NUMA_NO_NODE, NULL);
+ info.msi_desc = msidesc;
+ irq = irq_domain_alloc_irqs(domain, -1, cnt, node, &info);
if (irq <= 0)
return -ENOSPC;
+ }

- ret = setup_msi_irq(dev, msidesc, irq, 0);
- if (ret < 0) {
- irq_domain_free_irqs(irq, 1);
- return ret;
- }
-
+ if (type == PCI_CAP_ID_MSI) {
+ nvec_pow2 = __roundup_pow_of_two(nvec);
+ msidesc->msi_attrib.multiple = ilog2(nvec_pow2);
+ msidesc->nvec_used = nvec;
}
+
return 0;
}

@@ -172,6 +307,36 @@ void native_teardown_msi_irq(unsigned int irq)
irq_domain_free_irqs(irq, 1);
}

+static struct irq_domain *msi_create_domain(struct irq_domain *parent,
+ int remapped)
+{
+ struct irq_domain *domain;
+
+ domain = irq_domain_add_tree(NULL, &msi_domain_ops,
+ (void *)(long)remapped);
+ if (domain)
+ domain->parent = parent;
+
+ return domain;
+}
+
+void arch_init_msi_domain(struct irq_domain *parent) {
+ if (disable_apic)
+ return;
+
+ msi_default_domain = msi_create_domain(parent, 0);
+ if (!msi_default_domain)
+ pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); }
+
+#ifdef CONFIG_IRQ_REMAP
+struct irq_domain *arch_create_msi_irq_domain(struct irq_domain
+*parent) {
+ return msi_create_domain(parent, 1);
+}
+#endif
+
#ifdef CONFIG_DMAR_TABLE
static int
dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 774ab5ba95f2..e9329fc28c63 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -357,6 +357,8 @@ int __init arch_early_irq_init(void)
BUG_ON(x86_vector_domain == NULL);
irq_set_default_host(x86_vector_domain);

+ arch_init_msi_domain(x86_vector_domain);
+
return arch_early_ioapic_init();
}

diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 7ac44a464be0..bda0d8e73fde 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -178,7 +178,6 @@ static void __init irq_remapping_modify_x86_ops(void)
x86_io_apic_ops.set_affinity = set_remapped_irq_affinity;
x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry;
x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped;
- x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs;
x86_msi.setup_hpet_msi = setup_hpet_msi_remapped;
x86_msi.compose_msi_msg = compose_remapped_msi_msg;
}
--
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/