Re: Early boot regression from f0551af0213 ("x86/topology: Ignore non-present APIC IDs in a present package")

From: Thomas Gleixner
Date: Wed May 22 2024 - 18:13:01 EST


Lyude!

On Wed, May 22 2024 at 15:35, Lyude Paul wrote:

Thank for testing!

> Awesome! This patch does seem to make the system boot, thank you for
> your help

The only thing what's awesome here is that it confirms my analysis of
the underlying problem. I offered Borislav a bet on that, but he
politely declined :(

The not so awesome part is the question what to do with that insight.

The first issue is that we don't know whether that's only a problem on
your particular system or if there is an underlying systematic problem
on that particular CPU variant (model/stepping).

Unless the AMD folks can give an authoritative answer we have three options:

1) Targeted via quirk

As you are so far the only one complaining about this, it might be
sufficient to enforce the physical flat mode for your particular
machine via a DMI quirk or on the actual CPU model/stepping.

2) Tie it to interrupt remapping

That's the patch I provided you for testing

3) Remove the default logical destination mode on 64bit completely

My favourite

#1 is stupid IMO because it's likely that other systems are affected by
this nonsense and I don't want to end up adding quirks over and over

#2 is silly because it effectively enforces physical destination mode on
any system which has interrupt remapping available in hardware.

That's pretty much everything halfways modern.

#3 makes a lot of sense because:

- it reduces the amount of code

Given the trend of the last decade this actually removes code which
will be used less frequently as the number of logical CPUs keeps
increasing.

- the only benefit of logical destination mode over physical
destination mode is the ability to send IPIs to multiple CPUs in
one operation.

The question is whether this still matters.

IMO it does not matter because anything which is IPI sensitive is
running on machines which have more than 8 CPUs today. The time
where 8 CPU (threads) workstations and servers were state of the
art are long gone.

- physical destination mode is guaranteed to work because it's the
only way to get a CPU up and running via the INIT/INIT/STARTUP
sequence, while obvioulsy logical destination mode has its issues
not only on the system at hand (see physflat_acpi_madt_oem_check()).

Patch for this below.

Thanks,

tglx
---
arch/x86/kernel/apic/apic_flat_64.c | 116 ------------------------------------
1 file changed, 3 insertions(+), 113 deletions(-)

--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -18,126 +18,19 @@
#include "local.h"

static struct apic apic_physflat;
-static struct apic apic_flat;

-struct apic *apic __ro_after_init = &apic_flat;
+struct apic *apic __ro_after_init = &apic_phys_flat;
EXPORT_SYMBOL_GPL(apic);

-static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- return 1;
-}
-
-static void _flat_send_IPI_mask(unsigned long mask, int vector)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
- local_irq_restore(flags);
-}
-
-static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
-{
- unsigned long mask = cpumask_bits(cpumask)[0];
-
- _flat_send_IPI_mask(mask, vector);
-}
-
-static void
-flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
-{
- unsigned long mask = cpumask_bits(cpumask)[0];
- int cpu = smp_processor_id();
-
- if (cpu < BITS_PER_LONG)
- __clear_bit(cpu, &mask);
-
- _flat_send_IPI_mask(mask, vector);
-}
-
-static u32 flat_get_apic_id(u32 x)
-{
- return (x >> 24) & 0xFF;
-}
-
-static int flat_probe(void)
-{
- return 1;
-}
-
-static struct apic apic_flat __ro_after_init = {
- .name = "flat",
- .probe = flat_probe,
- .acpi_madt_oem_check = flat_acpi_madt_oem_check,
-
- .dest_mode_logical = true,
-
- .disable_esr = 0,
-
- .init_apic_ldr = default_init_apic_ldr,
- .cpu_present_to_apicid = default_cpu_present_to_apicid,
-
- .max_apic_id = 0xFE,
- .get_apic_id = flat_get_apic_id,
-
- .calc_dest_apicid = apic_flat_calc_apicid,
-
- .send_IPI = default_send_IPI_single,
- .send_IPI_mask = flat_send_IPI_mask,
- .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
- .send_IPI_allbutself = default_send_IPI_allbutself,
- .send_IPI_all = default_send_IPI_all,
- .send_IPI_self = default_send_IPI_self,
- .nmi_to_offline_cpu = true,
-
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .eoi = native_apic_mem_eoi,
- .icr_read = native_apic_icr_read,
- .icr_write = native_apic_icr_write,
- .wait_icr_idle = apic_mem_wait_icr_idle,
- .safe_wait_icr_idle = apic_mem_wait_icr_idle_timeout,
-};
-
-/*
- * Physflat mode is used when there are more than 8 CPUs on a system.
- * We cannot use logical delivery in this case because the mask
- * overflows, so use physical mode.
- */
-static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-#ifdef CONFIG_ACPI
- /*
- * Quirk: some x86_64 machines can only use physical APIC mode
- * regardless of how many processors are present (x86_64 ES7000
- * is an example).
- */
- if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
- printk(KERN_DEBUG "system APIC only can use physical flat");
- return 1;
- }
-
- if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) {
- printk(KERN_DEBUG "IBM Summit detected, will use apic physical");
- return 1;
- }
-#endif
-
- return 0;
-}
-
static int physflat_probe(void)
{
- return apic == &apic_physflat || num_possible_cpus() > 8 || jailhouse_paravirt();
+ return 1;
}

static struct apic apic_physflat __ro_after_init = {

.name = "physical flat",
.probe = physflat_probe,
- .acpi_madt_oem_check = physflat_acpi_madt_oem_check,

.dest_mode_logical = false,

@@ -167,7 +60,4 @@ static struct apic apic_physflat __ro_af
.safe_wait_icr_idle = apic_mem_wait_icr_idle_timeout,
};

-/*
- * We need to check for physflat first, so this order is important.
- */
-apic_drivers(apic_physflat, apic_flat);
+apic_drivers(apic_physflat);