[patch V5 35/39] x86/microcode: Protect against instrumentation

From: Thomas Gleixner
Date: Tue Oct 17 2023 - 17:26:17 EST


From: Thomas Gleixner <tglx@xxxxxxxxxxxxx>

The wait for control loop in which the siblings are waiting for the
microcode update on the primary thread must be protected against
instrumentation as instrumentation can end up in #INT3, #DB or #PF,
which then returns with IRET. That IRET reenables NMI which is the
opposite of what the NMI rendezvous is trying to achieve.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
arch/x86/kernel/cpu/microcode/core.c | 111 ++++++++++++++++++++++++++---------
1 file changed, 83 insertions(+), 28 deletions(-)
---
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -272,54 +272,65 @@ struct microcode_ctrl {

DEFINE_STATIC_KEY_FALSE(microcode_nmi_handler_enable);
static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl);
+static unsigned int loops_per_usec;
static atomic_t late_cpus_in;

-static bool wait_for_cpus(atomic_t *cnt)
+static noinstr bool wait_for_cpus(atomic_t *cnt)
{
- unsigned int timeout;
+ unsigned int timeout, loops;

- WARN_ON_ONCE(atomic_dec_return(cnt) < 0);
+ WARN_ON_ONCE(raw_atomic_dec_return(cnt) < 0);

for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
- if (!atomic_read(cnt))
+ if (!raw_atomic_read(cnt))
return true;

- udelay(1);
+ for (loops = 0; loops < loops_per_usec; loops++)
+ cpu_relax();

/* If invoked directly, tickle the NMI watchdog */
- if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC))
+ if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
+ instrumentation_begin();
touch_nmi_watchdog();
+ instrumentation_end();
+ }
}
/* Prevent the late comers from making progress and let them time out */
- atomic_inc(cnt);
+ raw_atomic_inc(cnt);
return false;
}

-static bool wait_for_ctrl(void)
+static noinstr bool wait_for_ctrl(void)
{
- unsigned int timeout;
+ unsigned int timeout, loops;

for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
- if (this_cpu_read(ucode_ctrl.ctrl) != SCTRL_WAIT)
+ if (raw_cpu_read(ucode_ctrl.ctrl) != SCTRL_WAIT)
return true;
- udelay(1);
+
+ for (loops = 0; loops < loops_per_usec; loops++)
+ cpu_relax();
+
/* If invoked directly, tickle the NMI watchdog */
- if (!microcode_ops->use_nmi && !(timeout % 1000))
+ if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
+ instrumentation_begin();
touch_nmi_watchdog();
+ instrumentation_end();
+ }
}
return false;
}

-static void load_secondary(unsigned int cpu)
+/*
+ * Protected against instrumentation up to the point where the primary
+ * thread completed the update. See microcode_nmi_handler() for details.
+ */
+static noinstr bool load_secondary_wait(unsigned int ctrl_cpu)
{
- unsigned int ctrl_cpu = this_cpu_read(ucode_ctrl.ctrl_cpu);
- enum ucode_state ret;
-
/* Initial rendezvous to ensure that all CPUs have arrived */
if (!wait_for_cpus(&late_cpus_in)) {
- pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in) - 1);
- this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
- return;
+ raw_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
+ return false;
}

/*
@@ -329,9 +340,33 @@ static void load_secondary(unsigned int
* scheduler, watchdogs etc. There is no way to safely evacuate the
* machine.
*/
- if (!wait_for_ctrl())
- panic("Microcode load: Primary CPU %d timed out\n", ctrl_cpu);
+ if (wait_for_ctrl())
+ return true;
+
+ instrumentation_begin();
+ panic("Microcode load: Primary CPU %d timed out\n", ctrl_cpu);
+ instrumentation_end();
+}

+/*
+ * Protected against instrumentation up to the point where the primary
+ * thread completed the update. See microcode_nmi_handler() for details.
+ */
+static noinstr void load_secondary(unsigned int cpu)
+{
+ unsigned int ctrl_cpu = raw_cpu_read(ucode_ctrl.ctrl_cpu);
+ enum ucode_state ret;
+
+ if (!load_secondary_wait(ctrl_cpu)) {
+ instrumentation_begin();
+ pr_err_once("load: %d CPUs timed out\n",
+ atomic_read(&late_cpus_in) - 1);
+ instrumentation_end();
+ return;
+ }
+
+ /* Primary thread completed. Allow to invoke instrumentable code */
+ instrumentation_begin();
/*
* If the primary succeeded then invoke the apply() callback,
* otherwise copy the state from the primary thread.
@@ -343,6 +378,7 @@ static void load_secondary(unsigned int

this_cpu_write(ucode_ctrl.result, ret);
this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
+ instrumentation_end();
}

static void load_primary(unsigned int cpu)
@@ -380,25 +416,43 @@ static void load_primary(unsigned int cp
}
}

-static bool microcode_update_handler(void)
+static noinstr bool microcode_update_handler(void)
{
- unsigned int cpu = smp_processor_id();
+ unsigned int cpu = raw_smp_processor_id();

- if (this_cpu_read(ucode_ctrl.ctrl_cpu) == cpu)
+ if (raw_cpu_read(ucode_ctrl.ctrl_cpu) == cpu) {
+ instrumentation_begin();
load_primary(cpu);
- else
+ instrumentation_end();
+ } else {
load_secondary(cpu);
+ }

+ instrumentation_begin();
touch_nmi_watchdog();
+ instrumentation_end();
+
return true;
}

-bool microcode_nmi_handler(void)
+/*
+ * Protection against instrumentation is required for CPUs which are not
+ * safe against an NMI which is delivered to the secondary SMT sibling
+ * while the primary thread updates the microcode. Instrumentation can end
+ * up in #INT3, #DB and #PF. The IRET from those exceptions reenables NMI
+ * which is the opposite of what the NMI rendezvous is trying to achieve.
+ *
+ * The primary thread is safe versus instrumentation as the actual
+ * microcode update handles this correctly. It's only the sibling code
+ * path which must be NMI safe until the primary thread completed the
+ * update.
+ */
+bool noinstr microcode_nmi_handler(void)
{
- if (!this_cpu_read(ucode_ctrl.nmi_enabled))
+ if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
return false;

- this_cpu_write(ucode_ctrl.nmi_enabled, false);
+ raw_cpu_write(ucode_ctrl.nmi_enabled, false);
return microcode_update_handler();
}

@@ -425,6 +479,7 @@ static int load_late_stop_cpus(void)
pr_err("You should switch to early loading, if possible.\n");

atomic_set(&late_cpus_in, num_online_cpus());
+ loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000);

/*
* Take a snapshot before the microcode update in order to compare and