Re: 4.15.17 regression: bisected: timeout during microcode update

From: Vitezslav Samel
Date: Fri Apr 20 2018 - 02:20:34 EST


On Thu, Apr 19, 2018 at 06:37:34PM +0200, Borislav Petkov wrote:
> On Thu, Apr 19, 2018 at 03:46:27PM +0200, Vitezslav Samel wrote:
> > ------------------------------------------------------------
> > microcode: __reload_late: CPU0
> > microcode: __reload_late: CPU3
> > microcode: __reload_late: CPU2
> > microcode: __reload_late: CPU1
> > microcode: __reload_late: CPU0 reloading
> > microcode: __reload_late: CPU2 reloading
> > microcode: __reload_late: CPU1 reloading
> > microcode: __reload_late: CPU3 reloading
> > microcode: find_patch: CPU2, NADA
>
> Ok, I think I have it. Please run the patch below, it still has the
> debugging output so please paste it here once you've done the exact same
> exercise.
>
> It should not explode this time! (Famous last words :-))

;-) This time it works.

(Ashok: all test were against stable 4.16.3)

------------------------------------------------------
microcode: __reload_late: CPU1
microcode: __reload_late: CPU0
microcode: __reload_late: CPU2
microcode: __reload_late: CPU3
microcode: __reload_late: CPU1 reloading
microcode: __reload_late: CPU0 reloading
microcode: __reload_late: CPU3 reloading
microcode: __reload_late: CPU2 reloading
microcode: find_patch: CPU0, phdr: 0x24, uci: 0x1c
microcode: find_patch: CPU0, find_matching_signature: sig: 0x306c3, pf: 0x2
microcode: find_patch: CPU0, found phdr: 0x24
microcode: updated to revision 0x24, date = 2018-01-21
microcode: __reload_late: CPU0 waiting to exit
microcode: find_patch: CPU3, phdr: 0x24, uci: 0x1c
microcode: find_patch: CPU3, find_matching_signature: sig: 0x306c3, pf: 0x2
microcode: find_patch: CPU3, found phdr: 0x24
microcode: __reload_late: CPU3 waiting to exit
microcode: find_patch: CPU2, phdr: 0x24, uci: 0x1c
microcode: find_patch: CPU2, find_matching_signature: sig: 0x306c3, pf: 0x2
microcode: find_patch: CPU2, found phdr: 0x24
microcode: __reload_late: CPU2 waiting to exit
microcode: find_patch: CPU1, phdr: 0x24, uci: 0x1c
microcode: find_patch: CPU1, find_matching_signature: sig: 0x306c3, pf: 0x2
microcode: find_patch: CPU1, found phdr: 0x24
microcode: __reload_late: CPU1 waiting to exit
x86/CPU: CPU features have changed after loading microcode, but might not take effect.
x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.
------------------------------------------------------

Thank you very much,

Vita


> Thx!
>
> ---
> arch/x86/kernel/cpu/microcode/core.c | 11 +++++++----
> arch/x86/kernel/cpu/microcode/intel.c | 17 ++++++++++++++---
> 2 files changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
> index 10c4fc2c91f8..e84877b0f7d7 100644
> --- a/arch/x86/kernel/cpu/microcode/core.c
> +++ b/arch/x86/kernel/cpu/microcode/core.c
> @@ -553,6 +553,8 @@ static int __reload_late(void *info)
> enum ucode_state err;
> int ret = 0;
>
> + pr_info("%s: CPU%d\n", __func__, cpu);
> +
> /*
> * Wait for all CPUs to arrive. A load will not be attempted unless all
> * CPUs show up.
> @@ -560,20 +562,21 @@ static int __reload_late(void *info)
> if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
> return -1;
>
> + pr_info("%s: CPU%d reloading\n", __func__, cpu);
> +
> spin_lock(&update_lock);
> apply_microcode_local(&err);
> spin_unlock(&update_lock);
>
> + /* siblings return UCODE_OK because their engine got updated already */
> if (err > UCODE_NFOUND) {
> pr_warn("Error reloading microcode on CPU %d\n", cpu);
> - return -1;
> - /* siblings return UCODE_OK because their engine got updated already */
> } else if (err == UCODE_UPDATED || err == UCODE_OK) {
> ret = 1;
> - } else {
> - return ret;
> }
>
> + pr_info("%s: CPU%d waiting to exit\n", __func__, cpu);
> +
> /*
> * Increase the wait timeout to a safe value here since we're
> * serializing the microcode update and that could take a while on a
> diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
> index 32b8e5724f96..725e0bb6df03 100644
> --- a/arch/x86/kernel/cpu/microcode/intel.c
> +++ b/arch/x86/kernel/cpu/microcode/intel.c
> @@ -485,7 +485,6 @@ static void show_saved_mc(void)
> */
> static void save_mc_for_early(u8 *mc, unsigned int size)
> {
> -#ifdef CONFIG_HOTPLUG_CPU
> /* Synchronization during CPU hotplug. */
> static DEFINE_MUTEX(x86_cpu_microcode_mutex);
>
> @@ -495,7 +494,6 @@ static void save_mc_for_early(u8 *mc, unsigned int size)
> show_saved_mc();
>
> mutex_unlock(&x86_cpu_microcode_mutex);
> -#endif
> }
>
> static bool load_builtin_intel_microcode(struct cpio_data *cp)
> @@ -727,21 +725,32 @@ static struct microcode_intel *find_patch(struct ucode_cpu_info *uci)
> {
> struct microcode_header_intel *phdr;
> struct ucode_patch *iter, *tmp;
> + int cpu = smp_processor_id();
>
> list_for_each_entry_safe(iter, tmp, &microcode_cache, plist) {
>
> phdr = (struct microcode_header_intel *)iter->data;
>
> + pr_info("%s: CPU%d, phdr: 0x%x, uci: 0x%x\n",
> + __func__, cpu, phdr->rev, uci->cpu_sig.rev);
> +
> if (phdr->rev <= uci->cpu_sig.rev)
> continue;
>
> + pr_info("%s: CPU%d, find_matching_signature: sig: 0x%x, pf: 0x%x\n",
> + __func__, cpu, uci->cpu_sig.sig, uci->cpu_sig.pf);
> +
> if (!find_matching_signature(phdr,
> uci->cpu_sig.sig,
> uci->cpu_sig.pf))
> continue;
>
> +
> + pr_info("%s: CPU%d, found phdr: 0x%x\n", __func__, cpu, phdr->rev);
> return iter->data;
> }
> +
> + pr_info("%s: CPU%d, NADA\n", __func__, cpu);
> return NULL;
> }
>
> @@ -805,8 +814,10 @@ static enum ucode_state apply_microcode_intel(int cpu)
> mc = find_patch(uci);
> if (!mc) {
> mc = uci->mc;
> - if (!mc)
> + if (!mc) {
> + pr_info("%s: CPU%d, NFOUND\n", __func__, cpu);
> return UCODE_NFOUND;
> + }
> }
>
> /*
> --
> 2.13.0
>
> SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
> --