Re: 4.15.17 regression: bisected: timeout during microcode update
From: Vitezslav Samel
Date: Thu Apr 19 2018 - 09:46:34 EST
On Thu, Apr 19, 2018 at 02:18:41PM +0200, Borislav Petkov wrote:
> On Thu, Apr 19, 2018 at 02:02:39PM +0200, Vitezslav Samel wrote:
> > Here it is:
>
> Thanks!
>
> > -------------------------------------------------------------
> > microcode: __reload_late: CPU1
> > microcode: __reload_late: CPU3
> > microcode: __reload_late: CPU2
> > microcode: __reload_late: CPU0
> > microcode: __reload_late: CPU1 reloading
> > microcode: __reload_late: CPU3 reloading
> > microcode: __reload_late: CPU2 reloading
> > microcode: __reload_late: CPU0 reloading
> > microcode: __reload_late: CPU3 returning 0x0
>
> Aaaha, there it is.
>
> Ok, debug patch v2, please do the same.
Here it is:
------------------------------------------------------------
microcode: __reload_late: CPU0
microcode: __reload_late: CPU3
microcode: __reload_late: CPU2
microcode: __reload_late: CPU1
microcode: __reload_late: CPU0 reloading
microcode: __reload_late: CPU2 reloading
microcode: __reload_late: CPU1 reloading
microcode: __reload_late: CPU3 reloading
microcode: find_patch: CPU2, NADA
microcode: apply_microcode_intel: CPU2, NFOUND
microcode: __reload_late: CPU2 returning 0x3
microcode: find_patch: CPU1, NADA
microcode: apply_microcode_intel: CPU1, NFOUND
microcode: __reload_late: CPU1 returning 0x3
microcode: find_patch: CPU3, NADA
microcode: apply_microcode_intel: CPU3, NFOUND
microcode: __reload_late: CPU3 returning 0x3
microcode: find_patch: CPU0, NADA
microcode: updated to revision 0x24, date = 2018-01-21
microcode: __reload_late: CPU0 waiting to exit
microcode: Timeout while waiting for CPUs rendezvous, remaining: 3
Kernel panic - not syncing: Timeout during microcode update!
CPU: 0 PID: 11 Comm: migration/0 Not tainted 4.16.3 #1
Hardware name: Supermicro X10SLM-F/X10SLM-F, BIOS 2.2 02/05/2015
Call Trace:
dump_stack+0x46/0x65
panic+0xca/0x208
__reload_late+0x122/0x130
multi_cpu_stop+0x55/0xa0
? cpu_stop_queue_work+0x80/0x80
cpu_stopper_thread+0x7d/0x100
? sort_range+0x20/0x20
smpboot_thread_fn+0x11f/0x1e0
kthread+0x101/0x120
? __kthread_create_on_node+0x150/0x150
? __kthread_create_on_node+0x40/0x150
ret_from_fork+0x35/0x40
Shutting down cpus with NMI
Kernel Offset: disabled
---[ end Kernel panic - not syncing: Timeout during microcode update!
------------------------------------------------------------
Cheers,
Vita
>
> Thx.
>
> ---
> diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
> index 10c4fc2c91f8..737f93422d0b 100644
> --- a/arch/x86/kernel/cpu/microcode/core.c
> +++ b/arch/x86/kernel/cpu/microcode/core.c
> @@ -553,6 +553,8 @@ static int __reload_late(void *info)
> enum ucode_state err;
> int ret = 0;
>
> + pr_info("%s: CPU%d\n", __func__, cpu);
> +
> /*
> * Wait for all CPUs to arrive. A load will not be attempted unless all
> * CPUs show up.
> @@ -560,6 +562,8 @@ static int __reload_late(void *info)
> if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
> return -1;
>
> + pr_info("%s: CPU%d reloading\n", __func__, cpu);
> +
> spin_lock(&update_lock);
> apply_microcode_local(&err);
> spin_unlock(&update_lock);
> @@ -571,9 +575,12 @@ static int __reload_late(void *info)
> } else if (err == UCODE_UPDATED || err == UCODE_OK) {
> ret = 1;
> } else {
> + pr_info("%s: CPU%d returning 0x%x\n", __func__, cpu, err);
> return ret;
> }
>
> + pr_info("%s: CPU%d waiting to exit\n", __func__, cpu);
> +
> /*
> * Increase the wait timeout to a safe value here since we're
> * serializing the microcode update and that could take a while on a
> diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
> index 32b8e5724f96..ad0a19154c40 100644
> --- a/arch/x86/kernel/cpu/microcode/intel.c
> +++ b/arch/x86/kernel/cpu/microcode/intel.c
> @@ -727,21 +727,32 @@ static struct microcode_intel *find_patch(struct ucode_cpu_info *uci)
> {
> struct microcode_header_intel *phdr;
> struct ucode_patch *iter, *tmp;
> + int cpu = smp_processor_id();
>
> list_for_each_entry_safe(iter, tmp, µcode_cache, plist) {
>
> phdr = (struct microcode_header_intel *)iter->data;
>
> + pr_info("%s: CPU%d, phdr: 0x%x, uci: 0x%x\n",
> + __func__, cpu, phdr->rev, uci->cpu_sig.rev);
> +
> if (phdr->rev <= uci->cpu_sig.rev)
> continue;
>
> + pr_info("%s: CPU%d, find_matching_signature: sig: 0x%x, pf: 0x%x\n",
> + __func__, cpu, uci->cpu_sig.sig, uci->cpu_sig.pf);
> +
> if (!find_matching_signature(phdr,
> uci->cpu_sig.sig,
> uci->cpu_sig.pf))
> continue;
>
> +
> + pr_info("%s: CPU%d, found phdr: 0x%x\n", __func__, cpu, phdr->rev);
> return iter->data;
> }
> +
> + pr_info("%s: CPU%d, NADA\n", __func__, cpu);
> return NULL;
> }
>
> @@ -805,8 +816,10 @@ static enum ucode_state apply_microcode_intel(int cpu)
> mc = find_patch(uci);
> if (!mc) {
> mc = uci->mc;
> - if (!mc)
> + if (!mc) {
> + pr_info("%s: CPU%d, NFOUND\n", __func__, cpu);
> return UCODE_NFOUND;
> + }
> }
>
> /*
>
> --
> Regards/Gruss,
> Boris.
>
> SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
> --