Re: 4.15.17 regression: bisected: timeout during microcode update

From: Borislav Petkov
Date: Thu Apr 19 2018 - 12:37:42 EST


On Thu, Apr 19, 2018 at 03:46:27PM +0200, Vitezslav Samel wrote:
> ------------------------------------------------------------
> microcode: __reload_late: CPU0
> microcode: __reload_late: CPU3
> microcode: __reload_late: CPU2
> microcode: __reload_late: CPU1
> microcode: __reload_late: CPU0 reloading
> microcode: __reload_late: CPU2 reloading
> microcode: __reload_late: CPU1 reloading
> microcode: __reload_late: CPU3 reloading
> microcode: find_patch: CPU2, NADA

Ok, I think I have it. Please run the patch below, it still has the
debugging output so please paste it here once you've done the exact same
exercise.

It should not explode this time! (Famous last words :-))

Thx!

---
arch/x86/kernel/cpu/microcode/core.c | 11 +++++++----
arch/x86/kernel/cpu/microcode/intel.c | 17 ++++++++++++++---
2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 10c4fc2c91f8..e84877b0f7d7 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -553,6 +553,8 @@ static int __reload_late(void *info)
enum ucode_state err;
int ret = 0;

+ pr_info("%s: CPU%d\n", __func__, cpu);
+
/*
* Wait for all CPUs to arrive. A load will not be attempted unless all
* CPUs show up.
@@ -560,20 +562,21 @@ static int __reload_late(void *info)
if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
return -1;

+ pr_info("%s: CPU%d reloading\n", __func__, cpu);
+
spin_lock(&update_lock);
apply_microcode_local(&err);
spin_unlock(&update_lock);

+ /* siblings return UCODE_OK because their engine got updated already */
if (err > UCODE_NFOUND) {
pr_warn("Error reloading microcode on CPU %d\n", cpu);
- return -1;
- /* siblings return UCODE_OK because their engine got updated already */
} else if (err == UCODE_UPDATED || err == UCODE_OK) {
ret = 1;
- } else {
- return ret;
}

+ pr_info("%s: CPU%d waiting to exit\n", __func__, cpu);
+
/*
* Increase the wait timeout to a safe value here since we're
* serializing the microcode update and that could take a while on a
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 32b8e5724f96..725e0bb6df03 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -485,7 +485,6 @@ static void show_saved_mc(void)
*/
static void save_mc_for_early(u8 *mc, unsigned int size)
{
-#ifdef CONFIG_HOTPLUG_CPU
/* Synchronization during CPU hotplug. */
static DEFINE_MUTEX(x86_cpu_microcode_mutex);

@@ -495,7 +494,6 @@ static void save_mc_for_early(u8 *mc, unsigned int size)
show_saved_mc();

mutex_unlock(&x86_cpu_microcode_mutex);
-#endif
}

static bool load_builtin_intel_microcode(struct cpio_data *cp)
@@ -727,21 +725,32 @@ static struct microcode_intel *find_patch(struct ucode_cpu_info *uci)
{
struct microcode_header_intel *phdr;
struct ucode_patch *iter, *tmp;
+ int cpu = smp_processor_id();

list_for_each_entry_safe(iter, tmp, &microcode_cache, plist) {

phdr = (struct microcode_header_intel *)iter->data;

+ pr_info("%s: CPU%d, phdr: 0x%x, uci: 0x%x\n",
+ __func__, cpu, phdr->rev, uci->cpu_sig.rev);
+
if (phdr->rev <= uci->cpu_sig.rev)
continue;

+ pr_info("%s: CPU%d, find_matching_signature: sig: 0x%x, pf: 0x%x\n",
+ __func__, cpu, uci->cpu_sig.sig, uci->cpu_sig.pf);
+
if (!find_matching_signature(phdr,
uci->cpu_sig.sig,
uci->cpu_sig.pf))
continue;

+
+ pr_info("%s: CPU%d, found phdr: 0x%x\n", __func__, cpu, phdr->rev);
return iter->data;
}
+
+ pr_info("%s: CPU%d, NADA\n", __func__, cpu);
return NULL;
}

@@ -805,8 +814,10 @@ static enum ucode_state apply_microcode_intel(int cpu)
mc = find_patch(uci);
if (!mc) {
mc = uci->mc;
- if (!mc)
+ if (!mc) {
+ pr_info("%s: CPU%d, NFOUND\n", __func__, cpu);
return UCODE_NFOUND;
+ }
}

/*
--
2.13.0

SUSE Linux GmbH, GF: Felix ImendÃrffer, Jane Smithard, Graham Norton, HRB 21284 (AG NÃrnberg)
--