[PATCH] x86: limit irq affinity

From: Dimitri Sivanich
Date: Sun Sep 27 2009 - 21:03:28 EST

Next message: Dave Chinner: "Re: regression in page writeback"
Previous message: Mike Frysinger: "Re: [PATCH] const: constify remaining file_operations"
Next in thread: Yinghai Lu: "Re: [PATCH] x86: limit irq affinity"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

This patch allows for restrictions to irq affinity via a new cpumask and
device node value in the irq_cfg structure. The node value can then be
used by specific x86 architectures to determine the cpumask for the
desired cpu irq affinity domain.

The mask forces IRQ affinity to remain within the specified cpu domain.
On some UV systems, this domain will be limited to the nodes accessible
to the given node. Currently other X86 systems will have all bits in
the cpumask set, so non-UV systems will remain unaffected at this time.

Signed-off-by: Dimitri Sivanich <sivanich@xxxxxxx>

---

arch/x86/Kconfig | 1
arch/x86/include/asm/uv/uv_irq.h | 2
arch/x86/include/asm/uv/uv_mmrs.h | 25 ++++
arch/x86/kernel/apic/io_apic.c | 166 +++++++++++++++++++++++++++------
arch/x86/kernel/apic/x2apic_uv_x.c | 2
arch/x86/kernel/uv_irq.c | 68 +++++++++++++
6 files changed, 235 insertions(+), 29 deletions(-)

Index: linux/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/io_apic.c 2009-09-26 15:28:04.000000000 -0500
+++ linux/arch/x86/kernel/apic/io_apic.c 2009-09-26 16:20:04.000000000 -0500
@@ -62,6 +62,7 @@
#include <asm/hw_irq.h>
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_irq.h>
+#include <asm/uv/uv.h>

#include <asm/apic.h>

@@ -149,6 +150,8 @@ struct irq_cfg {
struct irq_pin_list *irq_2_pin;
cpumask_var_t domain;
cpumask_var_t old_domain;
+ cpumask_var_t allowed;
+ int node;
unsigned move_cleanup_count;
u8 vector;
u8 move_in_progress : 1;
@@ -184,6 +187,18 @@ void __init io_apic_disable_legacy(void)
nr_irqs_gsi = 0;
}

+/*
+ * Setup IRQ affinity restriction.
+ */
+static void set_irq_cfg_cpus_allowed(struct irq_cfg *irq_cfg)
+{
+ if (is_uv_system())
+ uv_set_irq_cfg_cpus_allowed(irq_cfg->allowed, irq_cfg->node);
+ else
+ /* Default to allow anything */
+ cpumask_setall(irq_cfg->allowed);
+}
+
int __init arch_early_irq_init(void)
{
struct irq_cfg *cfg;
@@ -199,8 +214,11 @@ int __init arch_early_irq_init(void)
for (i = 0; i < count; i++) {
desc = irq_to_desc(i);
desc->chip_data = &cfg[i];
+ cfg->node = node;
zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
+ zalloc_cpumask_var_node(&cfg[i].allowed, GFP_NOWAIT, node);
+ set_irq_cfg_cpus_allowed(&cfg[i]);
if (i < nr_legacy_irqs)
cpumask_setall(cfg[i].domain);
}
@@ -229,12 +247,19 @@ static struct irq_cfg *get_one_free_irq_
if (cfg) {
if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
kfree(cfg);
- cfg = NULL;
- } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
+ return NULL;
+ }
+ if (!zalloc_cpumask_var_node(&cfg->old_domain,
GFP_ATOMIC, node)) {
free_cpumask_var(cfg->domain);
kfree(cfg);
- cfg = NULL;
+ return NULL;
+ }
+ if (!zalloc_cpumask_var_node(&cfg->allowed, GFP_ATOMIC, node)) {
+ free_cpumask_var(cfg->old_domain);
+ free_cpumask_var(cfg->domain);
+ kfree(cfg);
+ return NULL;
}
}

@@ -247,12 +272,14 @@ int arch_init_chip_data(struct irq_desc

cfg = desc->chip_data;
if (!cfg) {
- desc->chip_data = get_one_free_irq_cfg(node);
+ cfg = desc->chip_data = get_one_free_irq_cfg(node);
if (!desc->chip_data) {
printk(KERN_ERR "can not alloc irq_cfg\n");
BUG_ON(1);
}
}
+ cfg->node = node;
+ set_irq_cfg_cpus_allowed(cfg);

return 0;
}
@@ -334,6 +361,10 @@ void arch_init_copy_chip_data(struct irq

memcpy(cfg, old_cfg, sizeof(struct irq_cfg));

+ cfg->node = node;
+
+ set_irq_cfg_cpus_allowed(cfg);
+
init_copy_irq_2_pin(old_cfg, cfg, node);
}

@@ -1445,16 +1476,29 @@ static void setup_IO_APIC_irq(int apic_i
struct irq_cfg *cfg;
struct IO_APIC_route_entry entry;
unsigned int dest;
+ cpumask_var_t tmp_mask;

if (!IO_APIC_IRQ(irq))
return;

cfg = desc->chip_data;

- if (assign_irq_vector(irq, cfg, apic->target_cpus()))
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+ return;
+
+ if (!cpumask_and(tmp_mask, apic->target_cpus(), cfg->allowed)) {
+ free_cpumask_var(tmp_mask);
+ return;
+ }
+
+ if (assign_irq_vector(irq, cfg, tmp_mask)) {
+ free_cpumask_var(tmp_mask);
return;
+ }
+
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+ free_cpumask_var(tmp_mask);

apic_printk(APIC_VERBOSE,KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -2285,18 +2329,32 @@ set_desc_affinity(struct irq_desc *desc,
{
struct irq_cfg *cfg;
unsigned int irq;
-
- if (!cpumask_intersects(mask, cpu_online_mask))
- return BAD_APICID;
+ cpumask_var_t tmp_mask;

irq = desc->irq;
cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
return BAD_APICID;

- cpumask_copy(desc->affinity, mask);
+ if (!cpumask_and(tmp_mask, mask, cfg->allowed))
+ goto error;
+
+ if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+ goto error;
+
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;
+
+ cpumask_copy(desc->affinity, tmp_mask);
+
+ free_cpumask_var(tmp_mask);

return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+
+error:
+ free_cpumask_var(tmp_mask);
+ return BAD_APICID;
}

static int
@@ -2352,22 +2410,32 @@ migrate_ioapic_irq_desc(struct irq_desc
{
struct irq_cfg *cfg;
struct irte irte;
+ cpumask_var_t tmp_mask;
unsigned int dest;
unsigned int irq;
int ret = -1;

- if (!cpumask_intersects(mask, cpu_online_mask))
+ irq = desc->irq;
+ cfg = desc->chip_data;
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
return ret;

- irq = desc->irq;
+ if (!cpumask_and(tmp_mask, mask, cfg->allowed))
+ goto error;
+
+ if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+ goto error;
+
if (get_irte(irq, &irte))
- return ret;
+ goto error;

- cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
- return ret;
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;
+
+ ret = 0;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
@@ -2380,9 +2448,10 @@ migrate_ioapic_irq_desc(struct irq_desc
if (cfg->move_in_progress)
send_cleanup_vector(cfg);

- cpumask_copy(desc->affinity, mask);
-
- return 0;
+ cpumask_copy(desc->affinity, tmp_mask);
+error:
+ free_cpumask_var(tmp_mask);
+ return ret;
}

/*
@@ -3166,6 +3235,8 @@ unsigned int create_irq_nr(unsigned int

if (irq > 0) {
dynamic_irq_init(irq);
+ cfg_new->node = node;
+ set_irq_cfg_cpus_allowed(cfg_new);
/* restore it, in case dynamic_irq_init clear it */
if (desc_new)
desc_new->chip_data = cfg_new;
@@ -3217,16 +3288,29 @@ static int msi_compose_msg(struct pci_de
struct irq_cfg *cfg;
int err;
unsigned dest;
+ cpumask_var_t tmp_mask;

if (disable_apic)
return -ENXIO;

cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
- if (err)
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+ return -ENOMEM;
+
+ if (!cpumask_and(tmp_mask, apic->target_cpus(), cfg->allowed)) {
+ free_cpumask_var(tmp_mask);
+ return -ENOSPC;
+ }
+
+ err = assign_irq_vector(irq, cfg, tmp_mask);
+ if (err) {
+ free_cpumask_var(tmp_mask);
return err;
+ }
+
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+ free_cpumask_var(tmp_mask);

if (irq_remapped(irq)) {
struct irte irte;
@@ -3701,19 +3785,27 @@ static struct irq_chip ht_irq_chip = {
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
{
struct irq_cfg *cfg;
+ cpumask_var_t tmp_mask;
int err;

if (disable_apic)
return -ENXIO;

cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+ return -ENOMEM;
+ if (!cpumask_and(tmp_mask, apic->target_cpus(), cfg->allowed)) {
+ free_cpumask_var(tmp_mask);
+ return -ENOSPC;
+ }
+
+ err = assign_irq_vector(irq, cfg, tmp_mask);
if (!err) {
struct ht_irq_msg msg;
unsigned dest;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain,
- apic->target_cpus());
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);

@@ -3737,12 +3829,30 @@ int arch_setup_ht_irq(unsigned int irq,

dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
}
+ free_cpumask_var(tmp_mask);
return err;
}
#endif /* CONFIG_HT_IRQ */

#ifdef CONFIG_X86_UV
/*
+ * Setup IRQ affinity restriction for IRQ's setup prior to the availability
+ * of UV topology information.
+ */
+void arch_init_uv_cfg_cpus_allowed(void)
+{
+ struct irq_cfg *cfg;
+ int i;
+
+ /* Set allowed mask now that topology information is known */
+ for (i = 0; i < NR_IRQS; i++) {
+ cfg = irq_cfg(i);
+ if (cfg)
+ set_irq_cfg_cpus_allowed(cfg);
+ }
+}
+
+/*
* Re-target the irq to the specified CPU and enable the specified MMR located
* on the specified blade to allow the sending of MSIs to the specified CPU.
*/
@@ -3808,7 +3918,7 @@ void arch_disable_uv_irq(int mmr_blade,
mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
}
-#endif /* CONFIG_X86_64 */
+#endif /* CONFIG_X86_UV */

int __init io_apic_get_redir_entries (int ioapic)
{
Index: linux/arch/x86/include/asm/uv/uv_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_irq.h 2009-09-26 15:28:04.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_irq.h 2009-09-26 15:28:07.000000000 -0500
@@ -27,9 +27,11 @@ struct uv_IO_APIC_route_entry {

extern struct irq_chip uv_irq_chip;

+extern void arch_init_uv_cfg_cpus_allowed(void);
extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
extern void arch_disable_uv_irq(int, unsigned long);

+extern void uv_set_irq_cfg_cpus_allowed(struct cpumask *, int);
extern int uv_setup_irq(char *, int, int, unsigned long);
extern void uv_teardown_irq(unsigned int, int, unsigned long);

Index: linux/arch/x86/include/asm/uv/uv_mmrs.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_mmrs.h 2009-09-26 15:28:04.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_mmrs.h 2009-09-26 15:28:07.000000000 -0500
@@ -823,6 +823,31 @@ union uvh_lb_mcast_aoerr0_rpt_enable_u {
};

/* ========================================================================= */
+/* UVH_LB_SOCKET_DESTINATION_TABLE */
+/* ========================================================================= */
+#define UVH_LB_SOCKET_DESTINATION_TABLE 0x321000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_32 0x1800
+#define UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH 128
+
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_SHFT 1
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK 0x0000000000007ffeUL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_SHFT 15
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_MASK 0x0000000000008000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_SHFT 16
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000000000010000UL
+
+union uvh_lb_socket_destination_table_u {
+ unsigned long v;
+ struct uvh_lb_socket_destination_table_s {
+ unsigned long rsvd_0 : 1; /* */
+ unsigned long node_id : 14; /* RW */
+ unsigned long chip_id : 1; /* RW */
+ unsigned long parity : 1; /* RW */
+ unsigned long rsvd_17_63: 47; /* */
+ } s;
+};
+
+/* ========================================================================= */
/* UVH_LOCAL_INT0_CONFIG */
/* ========================================================================= */
#define UVH_LOCAL_INT0_CONFIG 0x61000UL
Index: linux/arch/x86/Kconfig
===================================================================
--- linux.orig/arch/x86/Kconfig 2009-09-26 15:28:04.000000000 -0500
+++ linux/arch/x86/Kconfig 2009-09-26 16:09:59.000000000 -0500
@@ -368,6 +368,7 @@ config X86_UV
depends on X86_EXTENDED_PLATFORM
depends on NUMA
depends on X86_X2APIC
+ depends on NUMA_IRQ_DESC
---help---
This option is needed in order to support SGI Ultraviolet systems.
If you don't have one of these, you should say N here.
Index: linux/arch/x86/kernel/uv_irq.c
===================================================================
--- linux.orig/arch/x86/kernel/uv_irq.c 2009-09-26 15:28:04.000000000 -0500
+++ linux/arch/x86/kernel/uv_irq.c 2009-09-26 16:22:58.000000000 -0500
@@ -11,8 +11,9 @@
#include <linux/module.h>
#include <linux/irq.h>

-#include <asm/apic.h>
#include <asm/uv/uv_irq.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/apic.h>

static void uv_noop(unsigned int irq)
{
@@ -42,6 +43,71 @@ struct irq_chip uv_irq_chip = {
};

/*
+ * Setup the cpumask for IRQ restriction for a given UV node.
+ */
+void uv_set_irq_cfg_cpus_allowed(struct cpumask *mask, int node)
+{
+#ifdef CONFIG_SPARSE_IRQ
+ unsigned long pnode_tbl[UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH];
+ unsigned long *pa, *pa_end;
+ int cpu, i;
+
+ if (!uv_num_possible_blades()) {
+ /* We do not have enough topology information yet */
+ cpumask_setall(mask);
+ return;
+ }
+
+ /* Assume nodes accessible from node 0 */
+ if (node < 0)
+ node = 0;
+
+ pa = uv_global_mmr64_address(uv_node_to_pnode(node),
+ UVH_LB_SOCKET_DESTINATION_TABLE);
+
+ for (i = 0; i < UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH; pa++, i++)
+ pnode_tbl[i] = UV_NASID_TO_PNODE(*pa &
+ UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK);
+
+ cpumask_clear(mask);
+
+ pa = pnode_tbl;
+ pa_end = pa + UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH;
+
+ /* Select the cpus on nodes accessible from our hub */
+ for_each_possible_cpu(cpu) {
+ int p = uv_cpu_to_pnode(cpu);
+
+ if (p < *pa) {
+ while (p < *pa) {
+ pa--;
+ if (pa < pnode_tbl) {
+ pa++;
+ break;
+ }
+ }
+ if (*pa == p)
+ cpumask_set_cpu(cpu, mask);
+ continue;
+ }
+
+ while (*pa < p) {
+ pa++;
+ if (pa == pa_end) {
+ pa--;
+ break;
+ }
+ }
+
+ if (*pa == p)
+ cpumask_set_cpu(cpu, mask);
+ }
+#else
+ cpumask_setall(mask);
+#endif
+}
+
+/*
* Set up a mapping of an available irq and vector, and enable the specified
* MMR that defines the MSI that is to be sent to the specified CPU when an
* interrupt is raised.
Index: linux/arch/x86/kernel/apic/x2apic_uv_x.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c 2009-09-26 15:28:04.000000000 -0500
+++ linux/arch/x86/kernel/apic/x2apic_uv_x.c 2009-09-26 16:10:02.000000000 -0500
@@ -23,6 +23,7 @@

#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
#include <asm/current.h>
#include <asm/pgtable.h>
#include <asm/uv/bios.h>
@@ -659,5 +660,6 @@ void __init uv_system_init(void)

uv_cpu_init();
uv_scir_register_cpu_notifier();
+ arch_init_uv_cfg_cpus_allowed();
proc_mkdir("sgi_uv", NULL);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Dave Chinner: "Re: regression in page writeback"
Previous message: Mike Frysinger: "Re: [PATCH] const: constify remaining file_operations"
Next in thread: Yinghai Lu: "Re: [PATCH] x86: limit irq affinity"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]