[RFC 12/12] Drivers: hv: vmbus: Ensure IRQ affinity isn't set to a CPU going offline

From: mhkelley58
Date: Tue Jun 04 2024 - 01:14:17 EST


From: Michael Kelley <mhklinux@xxxxxxxxxxx>

hv_synic_cleanup() currently prevents a CPU from going offline if any
VMBus channel IRQs are targeted at that CPU. However, current code has a
race in that an IRQ could be affinitized to the CPU after the check in
hv_synic_cleanup() and before the CPU is removed from cpu_online_mask.
Any channel interrupts could be lost and the channel would hang.

Fix this by adding a flag for each CPU indicating if the synic is online.
Filter the new affinity with these flags so that vmbus_irq_set_affinity()
doesn't pick a CPU where the synic is already offline.

Also add a spin lock so that vmbus_irq_set_affinity() changing the
channel target_cpu and sending the MODIFYCHANNEL message to Hyper-V
are atomic with respect to the checks made in hv_synic_cleanup().
hv_synic_cleanup() needs these operations to be atomic so that it
can correctly count the MODIFYCHANNEL messages that need to be
ack'ed by Hyper-V.

Signed-off-by: Michael Kelley <mhklinux@xxxxxxxxxxx>
---
drivers/hv/connection.c | 1 +
drivers/hv/hv.c | 22 ++++++++++++++++++++--
drivers/hv/hyperv_vmbus.h | 2 ++
drivers/hv/vmbus_drv.c | 34 ++++++++++++++++++++++++++++------
4 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index a105eecdeec2..b44ce3d39135 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -213,6 +213,7 @@ int vmbus_connect(void)

INIT_LIST_HEAD(&vmbus_connection.chn_list);
mutex_init(&vmbus_connection.channel_mutex);
+ spin_lock_init(&vmbus_connection.set_affinity_lock);

/*
* Setup the vmbus event connection for channel interrupt
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 76658dfc5008..89e491219129 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -338,6 +338,8 @@ int hv_synic_init(unsigned int cpu)
{
hv_synic_enable_regs(cpu);

+ cpumask_set_cpu(cpu, &vmbus_connection.synic_online);
+
hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);

return 0;
@@ -513,6 +515,17 @@ int hv_synic_cleanup(unsigned int cpu)
* TODO: Re-bind the channels to different CPUs.
*/
mutex_lock(&vmbus_connection.channel_mutex);
+ spin_lock(&vmbus_connection.set_affinity_lock);
+
+ /*
+ * Once the check for channels assigned to this CPU is complete, we
+ * must not allow a channel to be assigned to this CPU. So mark
+ * the synic as no longer online. This cpumask is checked in
+ * vmbus_irq_set_affinity() to prevent setting the affinity of
+ * an IRQ to such a CPU.
+ */
+ cpumask_clear_cpu(cpu, &vmbus_connection.synic_online);
+
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
if (channel->target_cpu == cpu) {
channel_found = true;
@@ -527,10 +540,11 @@ int hv_synic_cleanup(unsigned int cpu)
if (channel_found)
break;
}
+ spin_unlock(&vmbus_connection.set_affinity_lock);
mutex_unlock(&vmbus_connection.channel_mutex);

if (channel_found)
- return -EBUSY;
+ goto set_online;

/*
* channel_found == false means that any channels that were previously
@@ -547,7 +561,7 @@ int hv_synic_cleanup(unsigned int cpu)
if (hv_synic_event_pending()) {
pr_err("Events pending when trying to offline CPU %d\n",
cpu);
- return -EBUSY;
+ goto set_online;
}
}

@@ -557,4 +571,8 @@ int hv_synic_cleanup(unsigned int cpu)
hv_synic_disable_regs(cpu);

return 0;
+
+set_online:
+ cpumask_set_cpu(cpu, &vmbus_connection.synic_online);
+ return -EBUSY;
}
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 571b2955b38e..92ae5af10778 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -263,6 +263,8 @@ struct vmbus_connection {
struct fwnode_handle *vmbus_fwnode;
struct irq_domain *vmbus_irq_domain;
struct irq_chip vmbus_irq_chip;
+ cpumask_t synic_online;
+ spinlock_t set_affinity_lock;

/*
* VM-wide counts of MODIFYCHANNEL messages sent and completed.
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 87f2f3436136..3430ad42d7ba 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1351,6 +1351,14 @@ int vmbus_irq_set_affinity(struct irq_data *data,
return -EINVAL;
}

+ /*
+ * The spin lock must be held so that checking synic_online, sending
+ * the MODIFYCHANNEL message, and setting channel->target_cpu are
+ * atomic with respect to hv_synic_cleanup() clearing the CPU in
+ * synic_online and doing the search.
+ */
+ spin_lock(&vmbus_connection.set_affinity_lock);
+
/* Don't consider CPUs that are isolated */
if (housekeeping_enabled(HK_TYPE_MANAGED_IRQ))
cpumask_and(&tempmask, dest,
@@ -1367,30 +1375,39 @@ int vmbus_irq_set_affinity(struct irq_data *data,
origin_cpu = channel->target_cpu;
if (cpumask_test_cpu(origin_cpu, &tempmask)) {
target_cpu = origin_cpu;
+ spin_unlock(&vmbus_connection.set_affinity_lock);
goto update_effective;
}

/*
* Pick a CPU from the new affinity mask. As a simple heuristic to
* spread out the selection when the mask contains multiple CPUs,
- * start with whatever CPU was last selected.
+ * start with whatever CPU was last selected. Also filter out any
+ * CPUs where synic_online isn't set -- these CPUs are in the process
+ * of going offline and must not have channel interrupts assigned
+ * to them.
*/
+ cpumask_and(&tempmask, &tempmask, &vmbus_connection.synic_online);
target_cpu = cpumask_next_wrap(next_cpu, &tempmask, nr_cpu_ids, false);
- if (target_cpu >= nr_cpu_ids)
- return -EINVAL;
+ if (target_cpu >= nr_cpu_ids) {
+ ret = -EINVAL;
+ goto unlock;
+ }
next_cpu = target_cpu;

/*
* Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
* avoid sending the message and fail here for such channels.
*/
- if (channel->state != CHANNEL_OPENED_STATE)
- return -EIO;
+ if (channel->state != CHANNEL_OPENED_STATE) {
+ ret = -EIO;
+ goto unlock;
+ }

ret = vmbus_send_modifychannel(channel,
hv_cpu_number_to_vp_number(target_cpu));
if (ret)
- return ret;
+ goto unlock;

/*
* Warning. At this point, there is *no* guarantee that the host will
@@ -1408,6 +1425,7 @@ int vmbus_irq_set_affinity(struct irq_data *data,
*/

channel->target_cpu = target_cpu;
+ spin_unlock(&vmbus_connection.set_affinity_lock);

/* See init_vp_index(). */
if (hv_is_perf_channel(channel))
@@ -1422,6 +1440,10 @@ int vmbus_irq_set_affinity(struct irq_data *data,
update_effective:
irq_data_update_effective_affinity(data, cpumask_of(target_cpu));
return IRQ_SET_MASK_OK;
+
+unlock:
+ spin_unlock(&vmbus_connection.set_affinity_lock);
+ return ret;
}

/*
--
2.25.1