[GIT pull] x86 fixes for 4.4

From: Thomas Gleixner
Date: Sun Dec 13 2015 - 04:24:34 EST


Linus,

please pull the latest x86-urgent-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus

A series of small fixes to plug a race condition in the x86 vector
management code related to irq affinity settings.

Thanks,

tglx

------------------>
Jiang Liu (4):
x86/irq: Do not reuse struct apic_chip_data.old_domain as temporary buffer
x86/irq: Enhance __assign_irq_vector() to rollback in case of failure
x86/irq: Fix a race window in x86_vector_free_irqs()
x86/irq: Fix a race condition between vector assigning and cleanup


arch/x86/kernel/apic/vector.c | 117 +++++++++++++++++++-----------------------
1 file changed, 53 insertions(+), 64 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 861bc59c8f25..b63d6f84c0bb 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -30,7 +30,7 @@ struct apic_chip_data {

struct irq_domain *x86_vector_domain;
static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask;
+static cpumask_var_t vector_cpumask, used_cpumask;
static struct irq_chip lapic_controller;
#ifdef CONFIG_X86_IO_APIC
static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -117,13 +117,15 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
static int current_offset = VECTOR_OFFSET_START % 16;
int cpu, err;
+ unsigned int dest;

- if (d->move_in_progress)
+ if (cpumask_intersects(d->old_domain, cpu_online_mask))
return -EBUSY;

/* Only try and allocate irqs on cpus that are present */
err = -ENOSPC;
cpumask_clear(d->old_domain);
+ cpumask_clear(used_cpumask);
cpu = cpumask_first_and(mask, cpu_online_mask);
while (cpu < nr_cpu_ids) {
int new_cpu, vector, offset;
@@ -139,11 +141,15 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
* the current in use mask. So cleanup the vector
* allocation for the members that are not used anymore.
*/
- cpumask_andnot(d->old_domain, d->domain,
- vector_cpumask);
- d->move_in_progress =
- cpumask_intersects(d->old_domain, cpu_online_mask);
- cpumask_and(d->domain, d->domain, vector_cpumask);
+ cpumask_and(used_cpumask, d->domain, vector_cpumask);
+ err = apic->cpu_mask_to_apicid_and(mask, used_cpumask,
+ &dest);
+ if (!err) {
+ cpumask_andnot(d->old_domain, d->domain,
+ vector_cpumask);
+ cpumask_copy(d->domain, used_cpumask);
+ d->cfg.dest_apicid = dest;
+ }
break;
}

@@ -157,9 +163,8 @@ next:
}

if (unlikely(current_vector == vector)) {
- cpumask_or(d->old_domain, d->old_domain,
- vector_cpumask);
- cpumask_andnot(vector_cpumask, mask, d->old_domain);
+ cpumask_or(used_cpumask, used_cpumask, vector_cpumask);
+ cpumask_andnot(vector_cpumask, mask, used_cpumask);
cpu = cpumask_first_and(vector_cpumask,
cpu_online_mask);
continue;
@@ -167,22 +172,22 @@ next:

if (test_bit(vector, used_vectors))
goto next;
-
for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
goto next;
}
+ if (apic->cpu_mask_to_apicid_and(mask, vector_cpumask, &dest))
+ goto next;
+
/* Found one! */
current_vector = vector;
current_offset = offset;
- if (d->cfg.vector) {
+ if (d->cfg.vector)
cpumask_copy(d->old_domain, d->domain);
- d->move_in_progress =
- cpumask_intersects(d->old_domain, cpu_online_mask);
- }
+ d->cfg.vector = vector;
+ d->cfg.dest_apicid = dest;
for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
- d->cfg.vector = vector;
cpumask_copy(d->domain, vector_cpumask);
err = 0;
break;
@@ -190,8 +195,8 @@ next:

if (!err) {
/* cache destination APIC IDs into cfg->dest_apicid */
- err = apic->cpu_mask_to_apicid_and(mask, d->domain,
- &d->cfg.dest_apicid);
+ cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
+ d->move_in_progress = !cpumask_empty(d->old_domain);
}

return err;
@@ -223,26 +228,15 @@ static int assign_irq_vector_policy(int irq, int node,

static void clear_irq_vector(int irq, struct apic_chip_data *data)
{
- struct irq_desc *desc;
- unsigned long flags;
- int cpu, vector;
+ struct irq_desc *desc = irq_to_desc(irq);
+ int cpu, vector = data->cfg.vector;

- raw_spin_lock_irqsave(&vector_lock, flags);
- BUG_ON(!data->cfg.vector);
-
- vector = data->cfg.vector;
+ BUG_ON(!vector);
for_each_cpu_and(cpu, data->domain, cpu_online_mask)
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
-
data->cfg.vector = 0;
cpumask_clear(data->domain);

- if (likely(!data->move_in_progress)) {
- raw_spin_unlock_irqrestore(&vector_lock, flags);
- return;
- }
-
- desc = irq_to_desc(irq);
for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
vector++) {
@@ -253,7 +247,7 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
}
}
data->move_in_progress = 0;
- raw_spin_unlock_irqrestore(&vector_lock, flags);
+ cpumask_clear(data->old_domain);
}

void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -275,18 +269,21 @@ static void x86_vector_free_irqs(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs)
{
struct irq_data *irq_data;
+ unsigned long flags;
int i;

for (i = 0; i < nr_irqs; i++) {
irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
if (irq_data && irq_data->chip_data) {
+ raw_spin_lock_irqsave(&vector_lock, flags);
clear_irq_vector(virq + i, irq_data->chip_data);
free_apic_chip_data(irq_data->chip_data);
+ irq_domain_reset_irq_data(irq_data);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
#ifdef CONFIG_X86_IO_APIC
if (virq + i < nr_legacy_irqs())
legacy_irq_data[virq + i] = NULL;
#endif
- irq_domain_reset_irq_data(irq_data);
}
}
}
@@ -404,6 +401,7 @@ int __init arch_early_irq_init(void)
arch_init_htirq_domain(x86_vector_domain);

BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+ BUG_ON(!alloc_cpumask_var(&used_cpumask, GFP_KERNEL));

return arch_early_ioapic_init();
}
@@ -420,10 +418,13 @@ static void __setup_vector_irq(int cpu)
struct irq_data *idata = irq_desc_get_irq_data(desc);

data = apic_chip_data(idata);
- if (!data || !cpumask_test_cpu(cpu, data->domain))
- continue;
- vector = data->cfg.vector;
- per_cpu(vector_irq, cpu)[vector] = desc;
+ if (data) {
+ cpumask_clear_cpu(cpu, data->old_domain);
+ if (cpumask_test_cpu(cpu, data->domain)) {
+ vector = data->cfg.vector;
+ per_cpu(vector_irq, cpu)[vector] = desc;
+ }
+ }
}
/* Mark the free vectors */
for (vector = 0; vector < NR_VECTORS; ++vector) {
@@ -492,14 +493,8 @@ static int apic_set_affinity(struct irq_data *irq_data,
return -EINVAL;

err = assign_irq_vector(irq, data, dest);
- if (err) {
- if (assign_irq_vector(irq, data,
- irq_data_get_affinity_mask(irq_data)))
- pr_err("Failed to recover vector for irq %d\n", irq);
- return err;
- }

- return IRQ_SET_MASK_OK;
+ return err ? err : IRQ_SET_MASK_OK;
}

static struct irq_chip lapic_controller = {
@@ -511,20 +506,17 @@ static struct irq_chip lapic_controller = {
#ifdef CONFIG_SMP
static void __send_cleanup_vector(struct apic_chip_data *data)
{
- cpumask_var_t cleanup_mask;
-
- if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
- unsigned int i;
+ unsigned long flags;

- for_each_cpu_and(i, data->old_domain, cpu_online_mask)
- apic->send_IPI_mask(cpumask_of(i),
- IRQ_MOVE_CLEANUP_VECTOR);
- } else {
- cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
- apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- free_cpumask_var(cleanup_mask);
- }
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ if (!data->move_in_progress)
+ goto out_unlock;
data->move_in_progress = 0;
+ cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+ if (!cpumask_empty(data->old_domain))
+ apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
+out_unlock:
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
}

void send_cleanup_vector(struct irq_cfg *cfg)
@@ -568,14 +560,10 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
goto unlock;

/*
- * Check if the irq migration is in progress. If so, we
- * haven't received the cleanup request yet for this irq.
+ * Nothing to cleanup if this cpu is not set
+ * in the old_domain mask.
*/
- if (data->move_in_progress)
- goto unlock;
-
- if (vector == data->cfg.vector &&
- cpumask_test_cpu(me, data->domain))
+ if (!cpumask_test_cpu(me, data->old_domain))
goto unlock;

irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
@@ -591,6 +579,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
goto unlock;
}
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+ cpumask_clear_cpu(me, data->old_domain);
unlock:
raw_spin_unlock(&desc->lock);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/