Re: [PATCH v4 08/20] irqchip/gic-v4.1: Plumb get/set_irqchip_state SGI callbacks

From: Zenghui Yu
Date: Wed Feb 19 2020 - 22:12:03 EST


Hi Marc,

On 2020/2/18 23:31, Marc Zyngier wrote:
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 7656b353a95f..0ed286dba827 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -144,7 +144,7 @@ struct event_lpi_map {
ÂÂÂÂ u16ÂÂÂÂÂÂÂÂÂÂÂ *col_map;
ÂÂÂÂ irq_hw_number_tÂÂÂÂÂÂÂ lpi_base;
ÂÂÂÂ intÂÂÂÂÂÂÂÂÂÂÂ nr_lpis;
-ÂÂÂ raw_spinlock_tÂÂÂÂÂÂÂ vlpi_lock;
+ÂÂÂ raw_spinlock_tÂÂÂÂÂÂÂ map_lock;

So we use map_lock to protect both LPI's and VLPI's mapping affinity of
a device, and use vpe_lock to protect vPE's affinity, OK.

ÂÂÂÂ struct its_vmÂÂÂÂÂÂÂ *vm;
ÂÂÂÂ struct its_vlpi_mapÂÂÂ *vlpi_maps;
ÂÂÂÂ intÂÂÂÂÂÂÂÂÂÂÂ nr_vlpis;
@@ -240,15 +240,33 @@ static struct its_vlpi_map *get_vlpi_map(struct irq_data *d)
ÂÂÂÂ return NULL;
Â}

-static int irq_to_cpuid(struct irq_data *d)
+static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
Â{
-ÂÂÂ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
ÂÂÂÂ struct its_vlpi_map *map = get_vlpi_map(d);
+ÂÂÂ int cpu;

-ÂÂÂ if (map)
-ÂÂÂÂÂÂÂ return map->vpe->col_idx;
+ÂÂÂ if (map) {
+ÂÂÂÂÂÂÂ raw_spin_lock_irqsave(&map->vpe->vpe_lock, *flags);
+ÂÂÂÂÂÂÂ cpu = map->vpe->col_idx;
+ÂÂÂ } else {
+ÂÂÂÂÂÂÂ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+ÂÂÂÂÂÂÂ raw_spin_lock_irqsave(&its_dev->event_map.map_lock, *flags);
+ÂÂÂÂÂÂÂ cpu = its_dev->event_map.col_map[its_get_event_id(d)];
+ÂÂÂ }

-ÂÂÂ return its_dev->event_map.col_map[its_get_event_id(d)];
+ÂÂÂ return cpu;
+}

This helper is correct for normal LPIs and VLPIs, but wrong for per-vPE
IRQ (doorbell) and vSGIs. irq_data_get_irq_chip_data() gets confused by
both of them.

+
+static void irq_to_cpuid_unlock(struct irq_data *d, unsigned long flags)
+{
+ÂÂÂ struct its_vlpi_map *map = get_vlpi_map(d);
+
+ÂÂÂ if (map) {
+ÂÂÂÂÂÂÂ raw_spin_unlock_irqrestore(&map->vpe->vpe_lock, flags);
+ÂÂÂ } else {
+ÂÂÂÂÂÂÂ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+ÂÂÂÂÂÂÂ raw_spin_unlock_irqrestore(&its_dev->event_map.map_lock, flags);
+ÂÂÂ }
Â}

The same problem for this helper.


Âstatic struct its_collection *valid_col(struct its_collection *col)
@@ -1384,6 +1402,8 @@ static void direct_lpi_inv(struct irq_data *d)
Â{
ÂÂÂÂ struct its_vlpi_map *map = get_vlpi_map(d);
ÂÂÂÂ void __iomem *rdbase;
+ÂÂÂ unsigned long flags;
+ÂÂÂ int cpu;
ÂÂÂÂ u64 val;

ÂÂÂÂ if (map) {
@@ -1399,10 +1419,12 @@ static void direct_lpi_inv(struct irq_data *d)
ÂÂÂÂ }

ÂÂÂÂ /* Target the redistributor this LPI is currently routed to */
-ÂÂÂ rdbase = per_cpu_ptr(gic_rdists->rdist, irq_to_cpuid(d))->rd_base;
+ÂÂÂ cpu = irq_to_cpuid_lock(d, &flags);
+ÂÂÂ rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
ÂÂÂÂ gic_write_lpir(val, rdbase + GICR_INVLPIR);

ÂÂÂÂ wait_for_syncr(rdbase);
+ÂÂÂ irq_to_cpuid_unlock(d, flags);
Â}

Âstatic void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
@@ -1471,11 +1493,11 @@ static void its_unmask_irq(struct irq_data *d)
Âstatic int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ bool force)
Â{
-ÂÂÂ unsigned int cpu;
ÂÂÂÂ const struct cpumask *cpu_mask = cpu_online_mask;
ÂÂÂÂ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
ÂÂÂÂ struct its_collection *target_col;
-ÂÂÂ u32 id = its_get_event_id(d);
+ÂÂÂ unsigned int from, cpu;
+ÂÂÂ unsigned long flags;

ÂÂÂÂ /* A forwarded interrupt should use irq_set_vcpu_affinity */
ÂÂÂÂ if (irqd_is_forwarded_to_vcpu(d))
@@ -1496,12 +1518,16 @@ static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
ÂÂÂÂÂÂÂÂ return -EINVAL;

ÂÂÂÂ /* don't set the affinity when the target cpu is same as current one */
-ÂÂÂ if (cpu != its_dev->event_map.col_map[id]) {
+ÂÂÂ from = irq_to_cpuid_lock(d, &flags);
+ÂÂÂ if (cpu != from) {
+ÂÂÂÂÂÂÂ u32 id = its_get_event_id(d);
+
ÂÂÂÂÂÂÂÂ target_col = &its_dev->its->collections[cpu];
ÂÂÂÂÂÂÂÂ its_send_movi(its_dev, target_col, id);
ÂÂÂÂÂÂÂÂ its_dev->event_map.col_map[id] = cpu;
ÂÂÂÂÂÂÂÂ irq_data_update_effective_affinity(d, cpumask_of(cpu));
ÂÂÂÂ }
+ÂÂÂ irq_to_cpuid_unlock(d, flags);

ÂÂÂÂ return IRQ_SET_MASK_OK_DONE;
Â}
@@ -1636,7 +1662,7 @@ static int its_vlpi_map(struct irq_data *d, struct its_cmd_info *info)
ÂÂÂÂ if (!info->map)
ÂÂÂÂÂÂÂÂ return -EINVAL;

-ÂÂÂ raw_spin_lock(&its_dev->event_map.vlpi_lock);
+ÂÂÂ raw_spin_lock(&its_dev->event_map.map_lock);

ÂÂÂÂ if (!its_dev->event_map.vm) {
ÂÂÂÂÂÂÂÂ struct its_vlpi_map *maps;
@@ -1685,7 +1711,7 @@ static int its_vlpi_map(struct irq_data *d, struct its_cmd_info *info)
ÂÂÂÂ }

Âout:
-ÂÂÂ raw_spin_unlock(&its_dev->event_map.vlpi_lock);
+ÂÂÂ raw_spin_unlock(&its_dev->event_map.map_lock);
ÂÂÂÂ return ret;
Â}

@@ -1695,7 +1721,7 @@ static int its_vlpi_get(struct irq_data *d, struct its_cmd_info *info)
ÂÂÂÂ struct its_vlpi_map *map;
ÂÂÂÂ int ret = 0;

-ÂÂÂ raw_spin_lock(&its_dev->event_map.vlpi_lock);
+ÂÂÂ raw_spin_lock(&its_dev->event_map.map_lock);

ÂÂÂÂ map = get_vlpi_map(d);

@@ -1708,7 +1734,7 @@ static int its_vlpi_get(struct irq_data *d, struct its_cmd_info *info)
ÂÂÂÂ *info->map = *map;

Âout:
-ÂÂÂ raw_spin_unlock(&its_dev->event_map.vlpi_lock);
+ÂÂÂ raw_spin_unlock(&its_dev->event_map.map_lock);
ÂÂÂÂ return ret;
Â}

@@ -1718,7 +1744,7 @@ static int its_vlpi_unmap(struct irq_data *d)
ÂÂÂÂ u32 event = its_get_event_id(d);
ÂÂÂÂ int ret = 0;

-ÂÂÂ raw_spin_lock(&its_dev->event_map.vlpi_lock);
+ÂÂÂ raw_spin_lock(&its_dev->event_map.map_lock);

ÂÂÂÂ if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) {
ÂÂÂÂÂÂÂÂ ret = -EINVAL;
@@ -1748,7 +1774,7 @@ static int its_vlpi_unmap(struct irq_data *d)
ÂÂÂÂ }

Âout:
-ÂÂÂ raw_spin_unlock(&its_dev->event_map.vlpi_lock);
+ÂÂÂ raw_spin_unlock(&its_dev->event_map.map_lock);
ÂÂÂÂ return ret;
Â}

@@ -3193,7 +3219,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
ÂÂÂÂ dev->event_map.col_map = col_map;
ÂÂÂÂ dev->event_map.lpi_base = lpi_base;
ÂÂÂÂ dev->event_map.nr_lpis = nr_lpis;
-ÂÂÂ raw_spin_lock_init(&dev->event_map.vlpi_lock);
+ÂÂÂ raw_spin_lock_init(&dev->event_map.map_lock);
ÂÂÂÂ dev->device_id = dev_id;
ÂÂÂÂ INIT_LIST_HEAD(&dev->entry);

@@ -3560,6 +3586,7 @@ static int its_vpe_set_affinity(struct irq_data *d,
Â{
ÂÂÂÂ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
ÂÂÂÂ int from, cpu = cpumask_first(mask_val);
+ÂÂÂ unsigned long flags;

ÂÂÂÂ /*
ÂÂÂÂÂ * Changing affinity is mega expensive, so let's be as lazy as
@@ -3567,6 +3594,7 @@ static int its_vpe_set_affinity(struct irq_data *d,
ÂÂÂÂÂ * into the proxy device, we need to move the doorbell
ÂÂÂÂÂ * interrupt to its new location.
ÂÂÂÂÂ */
+ÂÂÂ raw_spin_lock_irqsave(&vpe->vpe_lock, flags);
ÂÂÂÂ if (vpe->col_idx == cpu)
ÂÂÂÂÂÂÂÂ goto out;

@@ -3586,6 +3614,7 @@ static int its_vpe_set_affinity(struct irq_data *d,

Âout:
ÂÂÂÂ irq_data_update_effective_affinity(d, cpumask_of(cpu));
+ÂÂÂ raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);

ÂÂÂÂ return IRQ_SET_MASK_OK_DONE;
Â}
@@ -3695,11 +3724,15 @@ static void its_vpe_send_inv(struct irq_data *d)

ÂÂÂÂ if (gic_rdists->has_direct_lpi) {
ÂÂÂÂÂÂÂÂ void __iomem *rdbase;
+ÂÂÂÂÂÂÂ unsigned long flags;
+ÂÂÂÂÂÂÂ int cpu;

ÂÂÂÂÂÂÂÂ /* Target the redistributor this VPE is currently known on */
-ÂÂÂÂÂÂÂ rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
+ÂÂÂÂÂÂÂ cpu = irq_to_cpuid_lock(d, &flags);
+ÂÂÂÂÂÂÂ rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
ÂÂÂÂÂÂÂÂ gic_write_lpir(d->parent_data->hwirq, rdbase + GICR_INVLPIR);
ÂÂÂÂÂÂÂÂ wait_for_syncr(rdbase);
+ÂÂÂÂÂÂÂ irq_to_cpuid_unlock(d, flags);
ÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂ its_vpe_send_cmd(vpe, its_send_inv);
ÂÂÂÂ }

Do we really need to grab the vpe_lock for those which are belong to
the same irqchip with its_vpe_set_affinity()? The IRQ core code should
already ensure the mutual exclusion among them, wrong?

@@ -3735,14 +3768,18 @@ static int its_vpe_set_irqchip_state(struct irq_data *d,

ÂÂÂÂ if (gic_rdists->has_direct_lpi) {
ÂÂÂÂÂÂÂÂ void __iomem *rdbase;
+ÂÂÂÂÂÂÂ unsigned long flags;
+ÂÂÂÂÂÂÂ int cpu;

-ÂÂÂÂÂÂÂ rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
+ÂÂÂÂÂÂÂ cpu = irq_to_cpuid_lock(d, &flags);
+ÂÂÂÂÂÂÂ rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
ÂÂÂÂÂÂÂÂ if (state) {
ÂÂÂÂÂÂÂÂÂÂÂÂ gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_SETLPIR);
ÂÂÂÂÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂÂÂÂÂ gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR);
ÂÂÂÂÂÂÂÂÂÂÂÂ wait_for_syncr(rdbase);
ÂÂÂÂÂÂÂÂ }
+ÂÂÂÂÂÂÂ irq_to_cpuid_unlock(d, flags);
ÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂ if (state)
ÂÂÂÂÂÂÂÂÂÂÂÂ its_vpe_send_cmd(vpe, its_send_int);
@@ -3854,14 +3891,17 @@ static void its_vpe_4_1_deschedule(struct its_vpe *vpe,
Âstatic void its_vpe_4_1_invall(struct its_vpe *vpe)
Â{
ÂÂÂÂ void __iomem *rdbase;
+ÂÂÂ unsigned long flags;
ÂÂÂÂ u64 val;

 val = GICR_INVALLR_V;
ÂÂÂÂ val |= FIELD_PREP(GICR_INVALLR_VPEID, vpe->vpe_id);

ÂÂÂÂ /* Target the redistributor this vPE is currently known on */
+ÂÂÂ raw_spin_lock_irqsave(&vpe->vpe_lock, flags);
ÂÂÂÂ rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
ÂÂÂÂ gic_write_lpir(val, rdbase + GICR_INVALLR);
+ÂÂÂ raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
Â}

Âstatic int its_vpe_4_1_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
@@ -3960,13 +4000,17 @@ static int its_sgi_get_irqchip_state(struct irq_data *d,
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ enum irqchip_irq_state which, bool *val)
Â{
ÂÂÂÂ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
-ÂÂÂ void __iomem *base = gic_data_rdist_cpu(vpe->col_idx)->rd_base + SZ_128K;
+ÂÂÂ void __iomem *base;
+ÂÂÂ unsigned long flags;
ÂÂÂÂ u32 count = 1000000;ÂÂÂ /* 1s! */
ÂÂÂÂ u32 status;
+ÂÂÂ int cpu;

ÂÂÂÂ if (which != IRQCHIP_STATE_PENDING)
ÂÂÂÂÂÂÂÂ return -EINVAL;

+ÂÂÂ cpu = irq_to_cpuid_lock(d, &flags);
+ÂÂÂ base = gic_data_rdist_cpu(cpu)->rd_base + SZ_128K;
ÂÂÂÂ writel_relaxed(vpe->vpe_id, base + GICR_VSGIR);
ÂÂÂÂ do {
ÂÂÂÂÂÂÂÂ status = readl_relaxed(base + GICR_VSGIPENDR);
@@ -3983,6 +4027,7 @@ static int its_sgi_get_irqchip_state(struct irq_data *d,
ÂÂÂÂ } while(count);

Âout:
+ÂÂÂ irq_to_cpuid_unlock(d, flags);
ÂÂÂÂ *val = !!(status & (1 << d->hwirq));

ÂÂÂÂ return 0;
@@ -4102,6 +4147,7 @@ static int its_vpe_init(struct its_vpe *vpe)
ÂÂÂÂÂÂÂÂ return -ENOMEM;
ÂÂÂÂ }

+ÂÂÂ raw_spin_lock_init(&vpe->vpe_lock);
ÂÂÂÂ vpe->vpe_id = vpe_id;
ÂÂÂÂ vpe->vpt_page = vpt_page;
ÂÂÂÂ if (gic_rdists->has_rvpeid)
diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index 46c167a6349f..fc43a63875a3 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -60,6 +60,7 @@ struct its_vpe {
ÂÂÂÂÂÂÂÂ };
ÂÂÂÂ };

+ÂÂÂ raw_spinlock_tÂÂÂÂÂÂÂ vpe_lock;
ÂÂÂÂ /*
ÂÂÂÂÂ * This collection ID is used to indirect the target
ÂÂÂÂÂ * redistributor for this VPE. The ID itself isn't involved in

I'm not sure if it's good enough, it may gets much clearer after
splitting.


Thanks,
Zenghui