[PATCH v2] xen/events: Fix race in set_evtchn_to_irq

From: Maximilian Heyne
Date: Thu Aug 12 2021 - 09:10:25 EST

There is a TOCTOU issue in set_evtchn_to_irq. Rows in the evtchn_to_irq
mapping are lazily allocated in this function. The check whether the row
is already present and the row initialization is not synchronized. Two
threads can at the same time allocate a new row for evtchn_to_irq and
add the irq mapping to the their newly allocated row. One thread will
overwrite what the other has set for evtchn_to_irq[row] and therefore
the irq mapping is lost. This will trigger a BUG_ON later in

INFO: pci 0000:1a:15.4: [1d0f:8061] type 00 class 0x010802
INFO: nvme 0000:1a:12.1: enabling device (0000 -> 0002)
INFO: nvme nvme77: 1/0/0 default/read/poll queues
CRIT: kernel BUG at drivers/xen/events/events_base.c:427!
WARN: invalid opcode: 0000 [#1] SMP NOPTI
WARN: Workqueue: nvme-reset-wq nvme_reset_work [nvme]
WARN: RIP: e030:bind_evtchn_to_cpu+0xc2/0xd0
WARN: Call Trace:
WARN: set_affinity_irq+0x121/0x150
WARN: irq_do_set_affinity+0x37/0xe0
WARN: irq_setup_affinity+0xf6/0x170
WARN: irq_startup+0x64/0xe0
WARN: __setup_irq+0x69e/0x740
WARN: ? request_threaded_irq+0xad/0x160
WARN: request_threaded_irq+0xf5/0x160
WARN: ? nvme_timeout+0x2f0/0x2f0 [nvme]
WARN: pci_request_irq+0xa9/0xf0
WARN: ? pci_alloc_irq_vectors_affinity+0xbb/0x130
WARN: queue_request_irq+0x4c/0x70 [nvme]
WARN: nvme_reset_work+0x82d/0x1550 [nvme]
WARN: ? check_preempt_wakeup+0x14f/0x230
WARN: ? check_preempt_curr+0x29/0x80
WARN: ? nvme_irq_check+0x30/0x30 [nvme]
WARN: process_one_work+0x18e/0x3c0
WARN: worker_thread+0x30/0x3a0
WARN: ? process_one_work+0x3c0/0x3c0
WARN: kthread+0x113/0x130
WARN: ? kthread_park+0x90/0x90
WARN: ret_from_fork+0x3a/0x50

This patch sets evtchn_to_irq rows via a cmpxchg operation so that they
will be set only once. The row is now cleared before writing it to
evtchn_to_irq in order to not create a race once the row is visible for
other threads.

While at it, do not require the page to be zeroed, because it will be
overwritten with -1's in clear_evtchn_to_irq_row anyway.

Signed-off-by: Maximilian Heyne <mheyne@xxxxxxxxx>
Fixes: d0b075ffeede ("xen/events: Refactor evtchn_to_irq array to be dynamically allocated")
drivers/xen/events/events_base.c | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index d7e361fb0548..0e44098f3977 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -198,12 +198,12 @@ static void disable_dynirq(struct irq_data *data);

static DEFINE_PER_CPU(unsigned int, irq_epoch);

-static void clear_evtchn_to_irq_row(unsigned row)
+static void clear_evtchn_to_irq_row(int *evtchn_row)
unsigned col;

for (col = 0; col < EVTCHN_PER_ROW; col++)
- WRITE_ONCE(evtchn_to_irq[row][col], -1);
+ WRITE_ONCE(evtchn_row[col], -1);

static void clear_evtchn_to_irq_all(void)
@@ -213,7 +213,7 @@ static void clear_evtchn_to_irq_all(void)
for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
if (evtchn_to_irq[row] == NULL)
- clear_evtchn_to_irq_row(row);
+ clear_evtchn_to_irq_row(evtchn_to_irq[row]);

@@ -221,6 +221,7 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
unsigned row;
unsigned col;
+ int *evtchn_row;

if (evtchn >= xen_evtchn_max_channels())
return -EINVAL;
@@ -233,11 +234,18 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
if (irq == -1)
return 0;

- evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
- if (evtchn_to_irq[row] == NULL)
+ evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0);
+ if (evtchn_row == NULL)
return -ENOMEM;

- clear_evtchn_to_irq_row(row);
+ clear_evtchn_to_irq_row(evtchn_row);
+ /*
+ * We've prepared an empty row for the mapping. If a different
+ * thread was faster inserting it, we can drop ours.
+ */
+ if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL)
+ free_page((unsigned long) evtchn_row);

WRITE_ONCE(evtchn_to_irq[row][col], irq);

Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879