Re: [Patch Part2 v6 22/27] x86, uv: Use hierarchy irqdomain to manage UV interrupts

From: Russ Anderson
Date: Wed Dec 17 2014 - 11:48:31 EST


On Wed, Dec 17, 2014 at 10:41:51AM +0800, Jiang Liu wrote:
> On 2014/12/17 1:29, Dimitri Sivanich wrote:
> > I answered my own question, this had never been tested on UV.
> >
> > The gru driver fails with:
> > SGI GRU Device Driver: uv_setup_irq failed, errno=22
> >
> > The info->type in uv_domain_alloc() is not set to X86_IRQ_ALLOC_TYPE_UV
> > (info->type is never set to that value anywhere).
> >
> > Adding the following to uv_setup_irq allows it to work:
> >
> > --- linux.orig/arch/x86/platform/uv/uv_irq.c
> > +++ linux/arch/x86/platform/uv/uv_irq.c
> > @@ -187,6 +187,7 @@ int uv_setup_irq(char *irq_name, int cpu
> > return -ENOMEM;
> >
> > init_irq_alloc_info(&info, cpumask_of(cpu));
> > + info.type = X86_IRQ_ALLOC_TYPE_UV;
> > info.uv_limit = limit;
> > info.uv_blade = mmr_blade;
> > info.uv_offset = mmr_offset;
> >
> > On Mon, Dec 15, 2014 at 03:37:35PM -0600, Dimitri Sivanich wrote:
> Hi Dimitri,
> Thanks for reporting and fixing this bug. We will rebase the
> tip/x86/apic branch and fold the above patch into the original patch.
> May I assume a Tested-by from you? I have no UV systems for testing.

287 lines of change in uv_irq.c completely untested in linux-next? Ouch.

Dimitri tested the one line change above, because the driver would
not even load without it. It will take some time to look through
your extensive changes to understand and verify it works.

Thanks.

> Regards!
> Gerry
>
> >> Was this patch ever tested on a UV system?
> >>
> >> Also, adding some SGI folks to the CC list, since there were none listed before.
> >>
> >> On Tue, Nov 25, 2014 at 01:53:31PM +0800, Jiang Liu wrote:
> >>> Enhance UV code to support hierarchy irqdomain, it helps to make
> >>> the architecture more clear.
> >>>
> >>> We should construct hwirq based on mmr_blade and mmr_offset, but
> >>> mmr_offset is type of unsigned long, it may exceed the range of
> >>> irq_hw_number_t. So help about the way to construct hwirq based
> >>> on mmr_blade and mmr_offset is welcomed!
> >>>
> >>> Signed-off-by: Jiang Liu <jiang.liu@xxxxxxxxxxxxxxx>
> >>> ---
> >>> arch/x86/include/asm/hw_irq.h | 9 ++
> >>> arch/x86/platform/uv/uv_irq.c | 287 ++++++++++++++++-------------------------
> >>> 2 files changed, 117 insertions(+), 179 deletions(-)
> >>>
> >>> diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
> >>> index 46dec7e37829..bbf90fe2a224 100644
> >>> --- a/arch/x86/include/asm/hw_irq.h
> >>> +++ b/arch/x86/include/asm/hw_irq.h
> >>> @@ -123,6 +123,7 @@ enum irq_alloc_type {
> >>> X86_IRQ_ALLOC_TYPE_MSI,
> >>> X86_IRQ_ALLOC_TYPE_MSIX,
> >>> X86_IRQ_ALLOC_TYPE_DMAR,
> >>> + X86_IRQ_ALLOC_TYPE_UV,
> >>> };
> >>>
> >>> struct irq_alloc_info {
> >>> @@ -169,6 +170,14 @@ struct irq_alloc_info {
> >>> void *ht_update;
> >>> };
> >>> #endif
> >>> +#ifdef CONFIG_X86_UV
> >>> + struct {
> >>> + int uv_limit;
> >>> + int uv_blade;
> >>> + unsigned long uv_offset;
> >>> + char *uv_name;
> >>> + };
> >>> +#endif
> >>> };
> >>> };
> >>>
> >>> diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
> >>> index 474912d03f40..c237ed34a498 100644
> >>> --- a/arch/x86/platform/uv/uv_irq.c
> >>> +++ b/arch/x86/platform/uv/uv_irq.c
> >>> @@ -19,17 +19,31 @@
> >>> #include <asm/uv/uv_hub.h>
> >>>
> >>> /* MMR offset and pnode of hub sourcing interrupts for a given irq */
> >>> -struct uv_irq_2_mmr_pnode{
> >>> - struct rb_node list;
> >>> +struct uv_irq_2_mmr_pnode {
> >>> unsigned long offset;
> >>> int pnode;
> >>> - int irq;
> >>> };
> >>>
> >>> -static DEFINE_SPINLOCK(uv_irq_lock);
> >>> -static struct rb_root uv_irq_root;
> >>> +static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info)
> >>> +{
> >>> + unsigned long mmr_value;
> >>> + struct uv_IO_APIC_route_entry *entry;
> >>> +
> >>> + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
> >>> + sizeof(unsigned long));
> >>> +
> >>> + mmr_value = 0;
> >>> + entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
> >>> + entry->vector = cfg->vector;
> >>> + entry->delivery_mode = apic->irq_delivery_mode;
> >>> + entry->dest_mode = apic->irq_dest_mode;
> >>> + entry->polarity = 0;
> >>> + entry->trigger = 0;
> >>> + entry->mask = 0;
> >>> + entry->dest = cfg->dest_apicid;
> >>>
> >>> -static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool);
> >>> + uv_write_global_mmr64(info->pnode, info->offset, mmr_value);
> >>> +}
> >>>
> >>> static void uv_noop(struct irq_data *data) { }
> >>>
> >>> @@ -38,6 +52,24 @@ static void uv_ack_apic(struct irq_data *data)
> >>> ack_APIC_irq();
> >>> }
> >>>
> >>> +static int
> >>> +uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
> >>> + bool force)
> >>> +{
> >>> + struct irq_data *parent = data->parent_data;
> >>> + struct irq_cfg *cfg = irqd_cfg(data);
> >>> + int ret;
> >>> +
> >>> + ret = parent->chip->irq_set_affinity(parent, mask, force);
> >>> + if (ret >= 0) {
> >>> + uv_program_mmr(cfg, data->chip_data);
> >>> + if (cfg->move_in_progress)
> >>> + send_cleanup_vector(cfg);
> >>> + }
> >>> +
> >>> + return ret;
> >>> +}
> >>> +
> >>> static struct irq_chip uv_irq_chip = {
> >>> .name = "UV-CORE",
> >>> .irq_mask = uv_noop,
> >>> @@ -46,179 +78,99 @@ static struct irq_chip uv_irq_chip = {
> >>> .irq_set_affinity = uv_set_irq_affinity,
> >>> };
> >>>
> >>> -/*
> >>> - * Add offset and pnode information of the hub sourcing interrupts to the
> >>> - * rb tree for a specific irq.
> >>> - */
> >>> -static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
> >>> +static int uv_domain_alloc(struct irq_domain *domain, unsigned int virq,
> >>> + unsigned int nr_irqs, void *arg)
> >>> {
> >>> - struct rb_node **link = &uv_irq_root.rb_node;
> >>> - struct rb_node *parent = NULL;
> >>> - struct uv_irq_2_mmr_pnode *n;
> >>> - struct uv_irq_2_mmr_pnode *e;
> >>> - unsigned long irqflags;
> >>> -
> >>> - n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
> >>> - uv_blade_to_memory_nid(blade));
> >>> - if (!n)
> >>> + struct uv_irq_2_mmr_pnode *chip_data;
> >>> + struct irq_alloc_info *info = arg;
> >>> + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
> >>> + int ret;
> >>> +
> >>> + if (nr_irqs > 1 || !info || info->type != X86_IRQ_ALLOC_TYPE_UV)
> >>> + return -EINVAL;
> >>> +
> >>> + chip_data = kmalloc_node(sizeof(*chip_data), GFP_KERNEL,
> >>> + irq_data->node);
> >>> + if (!chip_data)
> >>> return -ENOMEM;
> >>>
> >>> - n->irq = irq;
> >>> - n->offset = offset;
> >>> - n->pnode = uv_blade_to_pnode(blade);
> >>> - spin_lock_irqsave(&uv_irq_lock, irqflags);
> >>> - /* Find the right place in the rbtree: */
> >>> - while (*link) {
> >>> - parent = *link;
> >>> - e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
> >>> -
> >>> - if (unlikely(irq == e->irq)) {
> >>> - /* irq entry exists */
> >>> - e->pnode = uv_blade_to_pnode(blade);
> >>> - e->offset = offset;
> >>> - spin_unlock_irqrestore(&uv_irq_lock, irqflags);
> >>> - kfree(n);
> >>> - return 0;
> >>> - }
> >>> -
> >>> - if (irq < e->irq)
> >>> - link = &(*link)->rb_left;
> >>> + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
> >>> + if (ret >= 0) {
> >>> + if (info->uv_limit == UV_AFFINITY_CPU)
> >>> + irq_set_status_flags(virq, IRQ_NO_BALANCING);
> >>> else
> >>> - link = &(*link)->rb_right;
> >>> + irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
> >>> +
> >>> + chip_data->pnode = uv_blade_to_pnode(info->uv_blade);
> >>> + chip_data->offset = info->uv_offset;
> >>> + irq_domain_set_info(domain, virq, virq, &uv_irq_chip, chip_data,
> >>> + handle_percpu_irq, NULL, info->uv_name);
> >>> + } else {
> >>> + kfree(chip_data);
> >>> }
> >>>
> >>> - /* Insert the node into the rbtree. */
> >>> - rb_link_node(&n->list, parent, link);
> >>> - rb_insert_color(&n->list, &uv_irq_root);
> >>> -
> >>> - spin_unlock_irqrestore(&uv_irq_lock, irqflags);
> >>> - return 0;
> >>> + return ret;
> >>> }
> >>>
> >>> -/* Retrieve offset and pnode information from the rb tree for a specific irq */
> >>> -int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
> >>> +static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
> >>> + unsigned int nr_irqs)
> >>> {
> >>> - struct uv_irq_2_mmr_pnode *e;
> >>> - struct rb_node *n;
> >>> - unsigned long irqflags;
> >>> -
> >>> - spin_lock_irqsave(&uv_irq_lock, irqflags);
> >>> - n = uv_irq_root.rb_node;
> >>> - while (n) {
> >>> - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
> >>> -
> >>> - if (e->irq == irq) {
> >>> - *offset = e->offset;
> >>> - *pnode = e->pnode;
> >>> - spin_unlock_irqrestore(&uv_irq_lock, irqflags);
> >>> - return 0;
> >>> - }
> >>> -
> >>> - if (irq < e->irq)
> >>> - n = n->rb_left;
> >>> - else
> >>> - n = n->rb_right;
> >>> - }
> >>> - spin_unlock_irqrestore(&uv_irq_lock, irqflags);
> >>> - return -1;
> >>> + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
> >>> +
> >>> + BUG_ON(nr_irqs != 1);
> >>> + kfree(irq_data->chip_data);
> >>> + irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT);
> >>> + irq_clear_status_flags(virq, IRQ_NO_BALANCING);
> >>> + irq_domain_free_irqs_top(domain, virq, nr_irqs);
> >>> }
> >>>
> >>> /*
> >>> * Re-target the irq to the specified CPU and enable the specified MMR located
> >>> * on the specified blade to allow the sending of MSIs to the specified CPU.
> >>> */
> >>> -static int
> >>> -arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
> >>> - unsigned long mmr_offset, int limit)
> >>> +static void uv_domain_activate(struct irq_domain *domain,
> >>> + struct irq_data *irq_data)
> >>> {
> >>> - struct irq_cfg *cfg = irq_cfg(irq);
> >>> - unsigned long mmr_value;
> >>> - struct uv_IO_APIC_route_entry *entry;
> >>> - int mmr_pnode;
> >>> -
> >>> - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
> >>> - sizeof(unsigned long));
> >>> -
> >>> - if (limit == UV_AFFINITY_CPU)
> >>> - irq_set_status_flags(irq, IRQ_NO_BALANCING);
> >>> - else
> >>> - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
> >>> -
> >>> - irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
> >>> - irq_name);
> >>> -
> >>> - mmr_value = 0;
> >>> - entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
> >>> - entry->vector = cfg->vector;
> >>> - entry->delivery_mode = apic->irq_delivery_mode;
> >>> - entry->dest_mode = apic->irq_dest_mode;
> >>> - entry->polarity = 0;
> >>> - entry->trigger = 0;
> >>> - entry->mask = 0;
> >>> - entry->dest = cfg->dest_apicid;
> >>> -
> >>> - mmr_pnode = uv_blade_to_pnode(mmr_blade);
> >>> - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
> >>> -
> >>> - if (cfg->move_in_progress)
> >>> - send_cleanup_vector(cfg);
> >>> -
> >>> - return irq;
> >>> + uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
> >>> }
> >>>
> >>> /*
> >>> * Disable the specified MMR located on the specified blade so that MSIs are
> >>> * longer allowed to be sent.
> >>> */
> >>> -static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
> >>> +static void uv_domain_deactivate(struct irq_domain *domain,
> >>> + struct irq_data *irq_data)
> >>> {
> >>> unsigned long mmr_value;
> >>> struct uv_IO_APIC_route_entry *entry;
> >>>
> >>> - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
> >>> - sizeof(unsigned long));
> >>> -
> >>> mmr_value = 0;
> >>> entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
> >>> entry->mask = 1;
> >>> -
> >>> - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
> >>> + uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
> >>> }
> >>>
> >>> -static int
> >>> -uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
> >>> - bool force)
> >>> -{
> >>> - struct irq_cfg *cfg = irqd_cfg(data);
> >>> - unsigned int dest;
> >>> - unsigned long mmr_value, mmr_offset;
> >>> - struct uv_IO_APIC_route_entry *entry;
> >>> - int mmr_pnode;
> >>> -
> >>> - if (apic_set_affinity(data, mask, &dest))
> >>> - return -1;
> >>> -
> >>> - mmr_value = 0;
> >>> - entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
> >>> -
> >>> - entry->vector = cfg->vector;
> >>> - entry->delivery_mode = apic->irq_delivery_mode;
> >>> - entry->dest_mode = apic->irq_dest_mode;
> >>> - entry->polarity = 0;
> >>> - entry->trigger = 0;
> >>> - entry->mask = 0;
> >>> - entry->dest = dest;
> >>> -
> >>> - /* Get previously stored MMR and pnode of hub sourcing interrupts */
> >>> - if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode))
> >>> - return -1;
> >>> -
> >>> - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
> >>> +static struct irq_domain_ops uv_domain_ops = {
> >>> + .alloc = uv_domain_alloc,
> >>> + .free = uv_domain_free,
> >>> + .activate = uv_domain_activate,
> >>> + .deactivate = uv_domain_deactivate,
> >>> +};
> >>>
> >>> - if (cfg->move_in_progress)
> >>> - send_cleanup_vector(cfg);
> >>> +static struct irq_domain *uv_get_irq_domain(void)
> >>> +{
> >>> + static struct irq_domain *uv_domain;
> >>> + static DEFINE_MUTEX(uv_lock);
> >>> +
> >>> + mutex_lock(&uv_lock);
> >>> + if (uv_domain == NULL) {
> >>> + uv_domain = irq_domain_add_tree(NULL, &uv_domain_ops, NULL);
> >>> + if (uv_domain)
> >>> + uv_domain->parent = x86_vector_domain;
> >>> + }
> >>> + mutex_unlock(&uv_lock);
> >>>
> >>> - return IRQ_SET_MASK_OK_NOCOPY;
> >>> + return uv_domain;
> >>> }
> >>>
> >>> /*
> >>> @@ -229,23 +181,20 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
> >>> int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
> >>> unsigned long mmr_offset, int limit)
> >>> {
> >>> - int ret, irq;
> >>> struct irq_alloc_info info;
> >>> + struct irq_domain *domain = uv_get_irq_domain();
> >>> +
> >>> + if (!domain)
> >>> + return -ENOMEM;
> >>>
> >>> init_irq_alloc_info(&info, cpumask_of(cpu));
> >>> - irq = irq_domain_alloc_irqs(NULL, 1, uv_blade_to_memory_nid(mmr_blade),
> >>> - &info);
> >>> - if (irq <= 0)
> >>> - return -EBUSY;
> >>> -
> >>> - ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
> >>> - limit);
> >>> - if (ret == irq)
> >>> - uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
> >>> - else
> >>> - irq_domain_free_irqs(irq, 1);
> >>> + info.uv_limit = limit;
> >>> + info.uv_blade = mmr_blade;
> >>> + info.uv_offset = mmr_offset;
> >>> + info.uv_name = irq_name;
> >>>
> >>> - return ret;
> >>> + return irq_domain_alloc_irqs(domain, 1,
> >>> + uv_blade_to_memory_nid(mmr_blade), &info);
> >>> }
> >>> EXPORT_SYMBOL_GPL(uv_setup_irq);
> >>>
> >>> @@ -258,26 +207,6 @@ EXPORT_SYMBOL_GPL(uv_setup_irq);
> >>> */
> >>> void uv_teardown_irq(unsigned int irq)
> >>> {
> >>> - struct uv_irq_2_mmr_pnode *e;
> >>> - struct rb_node *n;
> >>> - unsigned long irqflags;
> >>> -
> >>> - spin_lock_irqsave(&uv_irq_lock, irqflags);
> >>> - n = uv_irq_root.rb_node;
> >>> - while (n) {
> >>> - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
> >>> - if (e->irq == irq) {
> >>> - arch_disable_uv_irq(e->pnode, e->offset);
> >>> - rb_erase(n, &uv_irq_root);
> >>> - kfree(e);
> >>> - break;
> >>> - }
> >>> - if (irq < e->irq)
> >>> - n = n->rb_left;
> >>> - else
> >>> - n = n->rb_right;
> >>> - }
> >>> - spin_unlock_irqrestore(&uv_irq_lock, irqflags);
> >>> irq_domain_free_irqs(irq, 1);
> >>> }
> >>> EXPORT_SYMBOL_GPL(uv_teardown_irq);
> >>> --
> >>> 1.7.10.4
> >>>
> >>> --
> >>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> >>> the body of a message to majordomo@xxxxxxxxxxxxxxx
> >>> More majordomo info at http://vger.kernel.org/majordomo-info.html
> >>> Please read the FAQ at http://www.tux.org/lkml/
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@xxxxxxxxxxxxxxx
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at http://www.tux.org/lkml/
> >

--
Russ Anderson, Kernel and Performance Software Team Manager
SGI - Silicon Graphics Inc rja@xxxxxxx
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/