[tip:irq/core] genirq/cpuhotplug: Handle managed IRQs on CPU hotplug
From: tip-bot for Thomas Gleixner
Date: Thu Jun 22 2017 - 13:13:24 EST
Commit-ID: c5cb83bb337c25caae995d992d1cdf9b317f83de
Gitweb: http://git.kernel.org/tip/c5cb83bb337c25caae995d992d1cdf9b317f83de
Author: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
AuthorDate: Tue, 20 Jun 2017 01:37:51 +0200
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitDate: Thu, 22 Jun 2017 18:21:25 +0200
genirq/cpuhotplug: Handle managed IRQs on CPU hotplug
If a CPU goes offline, interrupts affine to the CPU are moved away. If the
outgoing CPU is the last CPU in the affinity mask the migration code breaks
the affinity and sets it it all online cpus.
This is a problem for affinity managed interrupts as CPU hotplug is often
used for power management purposes. If the affinity is broken, the
interrupt is not longer affine to the CPUs to which it was allocated.
The affinity spreading allows to lay out multi queue devices in a way that
they are assigned to a single CPU or a group of CPUs. If the last CPU goes
offline, then the queue is not longer used, so the interrupt can be
shutdown gracefully and parked until one of the assigned CPUs comes online
again.
Add a graceful shutdown mechanism into the irq affinity breaking code path,
mark the irq as MANAGED_SHUTDOWN and leave the affinity mask unmodified.
In the online path, scan the active interrupts for managed interrupts and
if the interrupt is functional and the newly online CPU is part of the
affinity mask, restart the interrupt if it is marked MANAGED_SHUTDOWN or if
the interrupts is started up, try to add the CPU back to the effective
affinity mask.
Originally-by: Christoph Hellwig <hch@xxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Jens Axboe <axboe@xxxxxxxxx>
Cc: Marc Zyngier <marc.zyngier@xxxxxxx>
Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
Cc: Keith Busch <keith.busch@xxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Link: http://lkml.kernel.org/r/20170619235447.273417334@xxxxxxxxxxxxx
---
include/linux/cpuhotplug.h | 1 +
include/linux/irq.h | 5 +++++
kernel/cpu.c | 5 +++++
kernel/irq/cpuhotplug.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 56 insertions(+)
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 0f2a803..c15f22c 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -124,6 +124,7 @@ enum cpuhp_state {
CPUHP_AP_ONLINE_IDLE,
CPUHP_AP_SMPBOOT_THREADS,
CPUHP_AP_X86_VDSO_VMA_ONLINE,
+ CPUHP_AP_IRQ_AFFINITY_ONLINE,
CPUHP_AP_PERF_ONLINE,
CPUHP_AP_PERF_X86_ONLINE,
CPUHP_AP_PERF_X86_UNCORE_ONLINE,
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 807042b..19cea63 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -500,7 +500,12 @@ extern int irq_set_affinity_locked(struct irq_data *data,
const struct cpumask *cpumask, bool force);
extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info);
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_IRQ_MIGRATION)
extern void irq_migrate_all_off_this_cpu(void);
+extern int irq_affinity_online_cpu(unsigned int cpu);
+#else
+# define irq_affinity_online_cpu NULL
+#endif
#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
void irq_move_irq(struct irq_data *data);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index cb51034..b86b32e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1252,6 +1252,11 @@ static struct cpuhp_step cpuhp_ap_states[] = {
.startup.single = smpboot_unpark_threads,
.teardown.single = NULL,
},
+ [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
+ .name = "irq/affinity:online",
+ .startup.single = irq_affinity_online_cpu,
+ .teardown.single = NULL,
+ },
[CPUHP_AP_PERF_ONLINE] = {
.name = "perf:online",
.startup.single = perf_event_init_cpu,
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 0b093db..b7964e7 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -83,6 +83,15 @@ static bool migrate_one_irq(struct irq_desc *desc)
chip->irq_mask(d);
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+ /*
+ * If the interrupt is managed, then shut it down and leave
+ * the affinity untouched.
+ */
+ if (irqd_affinity_is_managed(d)) {
+ irqd_set_managed_shutdown(d);
+ irq_shutdown(desc);
+ return false;
+ }
affinity = cpu_online_mask;
brokeaff = true;
}
@@ -129,3 +138,39 @@ void irq_migrate_all_off_this_cpu(void)
}
}
}
+
+static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
+{
+ struct irq_data *data = irq_desc_get_irq_data(desc);
+ const struct cpumask *affinity = irq_data_get_affinity_mask(data);
+
+ if (!irqd_affinity_is_managed(data) || !desc->action ||
+ !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity))
+ return;
+
+ if (irqd_is_managed_and_shutdown(data))
+ irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+ else
+ irq_set_affinity_locked(data, affinity, false);
+}
+
+/**
+ * irq_affinity_online_cpu - Restore affinity for managed interrupts
+ * @cpu: Upcoming CPU for which interrupts should be restored
+ */
+int irq_affinity_online_cpu(unsigned int cpu)
+{
+ struct irq_desc *desc;
+ unsigned int irq;
+
+ irq_lock_sparse();
+ for_each_active_irq(irq) {
+ desc = irq_to_desc(irq);
+ raw_spin_lock_irq(&desc->lock);
+ irq_restore_affinity_of_irq(desc, cpu);
+ raw_spin_unlock_irq(&desc->lock);
+ }
+ irq_unlock_sparse();
+
+ return 0;
+}