Re: [PATCH 4/7] thermal: intel: hfi: Handle CPU hotplug events

From: Rafael J. Wysocki
Date: Wed Nov 24 2021 - 09:49:17 EST


On Sat, Nov 6, 2021 at 2:34 AM Ricardo Neri
<ricardo.neri-calderon@xxxxxxxxxxxxxxx> wrote:
>
> All CPUs in a package are represented in an HFI table. There exists an
> HFI table per package. Thus, CPUs in a package need to coordinate to
> initialize and access the table. Do such coordination during CPU hotplug.
> Use the first CPU to come online in a package to initialize the HFI table
> and the data structure representing it. Other CPUs in the same package need
> only to register or unregister themselves in that data structure.
>
> The HFI depends on both the package-level thermal management and the local
> APIC thermal local vector. Thus, ensure that both are properly configured
> before calling intel_hfi_online(). The CPU hotplug callbacks of the thermal
> throttle events code already meet these conditions. Enable the HFI from
> thermal_throttle_online().
>
> Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> Cc: Aubrey Li <aubrey.li@xxxxxxxxxxxxxxx>
> Cc: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx>
> Cc: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
> Cc: "Ravi V. Shankar" <ravi.v.shankar@xxxxxxxxx>
> Reviewed-by: Len Brown <len.brown@xxxxxxxxx>
> Signed-off-by: Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx>
> ---
> drivers/thermal/intel/intel_hfi.c | 211 ++++++++++++++++++++++++++++
> drivers/thermal/intel/intel_hfi.h | 4 +
> drivers/thermal/intel/therm_throt.c | 8 ++
> 3 files changed, 223 insertions(+)
>
> diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> index edfe343507b3..6a3adfd57d72 100644
> --- a/drivers/thermal/intel/intel_hfi.c
> +++ b/drivers/thermal/intel/intel_hfi.c
> @@ -21,6 +21,7 @@
>
> #define pr_fmt(fmt) "intel-hfi: " fmt
>
> +#include <linux/io.h>
> #include <linux/slab.h>
>
> #include "intel_hfi.h"
> @@ -52,16 +53,26 @@ struct hfi_hdr {
>
> /**
> * struct hfi_instance - Representation of an HFI instance (i.e., a table)
> + * @table_base: Base of the local copy of the HFI table
> * @ts_counter: Time stamp of the last update of the table
> * @hdr: Base address of the table header
> * @data: Base address of the table data
> + * @die_id: Logical die ID this HFI table instance
> + * @cpus: CPUs represented in this HFI table instance
> + * @hw_table: Pointer to the HFI table of this instance
> + * @initialized: True if this HFI instance has bee initialized
> *
> * A set of parameters to parse and navigate a specific HFI table.
> */
> struct hfi_instance {
> + void *table_base;
> u64 *ts_counter;
> void *hdr;
> void *data;
> + u16 die_id;
> + struct cpumask *cpus;
> + void *hw_table;
> + bool initialized;
> };
>
> /**
> @@ -83,10 +94,210 @@ struct hfi_features {
> bool parsed;
> };
>
> +/**
> + * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
> + * @index: Row of this CPU in its HFI table
> + * @hfi_instance: Attributes of the HFI table to which this CPU belongs
> + *
> + * Parameters to link a logical processor to an HFI table and a row within it.
> + */
> +struct hfi_cpu_info {
> + s16 index;
> + struct hfi_instance *hfi_instance;
> +};
> +
> +static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
> +
> static int max_hfi_instances;
> static struct hfi_instance *hfi_instances;
>
> static struct hfi_features hfi_features;
> +static DEFINE_MUTEX(hfi_lock);
> +
> +static void init_hfi_cpu_index(unsigned int cpu)

I would make this function take a (struct hfi_cpu_info *) argument
instead of the CPU number. It would be more concise then.

> +{
> + s16 hfi_idx;
> + u32 edx;
> +
> + /* Do not re-read @cpu's index if it has already been initialized. */
> + if (per_cpu(hfi_cpu_info, cpu).index > -1)
> + return;
> +
> + edx = cpuid_edx(CPUID_HFI_LEAF);
> + hfi_idx = (edx & CPUID_HFI_CPU_INDEX_MASK) >> CPUID_HFI_CPU_INDEX_SHIFT;
> +
> + per_cpu(hfi_cpu_info, cpu).index = hfi_idx;
> +}
> +
> +/*
> + * The format of the HFI table depends on the number of capabilities that the
> + * hardware supports. Keep a data structure to navigate the table.
> + */
> +static void init_hfi_instance(struct hfi_instance *hfi_instance)
> +{
> + /* The HFI time-stamp is located at the base of the table. */
> + hfi_instance->ts_counter = hfi_instance->table_base;
> +
> + /* The HFI header is below the time-stamp. */
> + hfi_instance->hdr = hfi_instance->table_base +
> + sizeof(*hfi_instance->ts_counter);
> +
> + /* The HFI data starts below the header. */
> + hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
> +}
> +
> +/**
> + * intel_hfi_online() - Enable HFI on @cpu
> + * @cpu: CPU in which the HFI will be enabled
> + *
> + * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
> + * level. The first CPU in the die/package to come online does the full HFI
> + * initialization. Subsequent CPUs will just link themselves to the HFI
> + * instance of their die/package.
> + */
> +void intel_hfi_online(unsigned int cpu)
> +{
> + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
> + u16 die_id = topology_logical_die_id(cpu);
> + struct hfi_instance *hfi_instance;
> + phys_addr_t hw_table_pa;
> + u64 msr_val;
> +
> + if (!boot_cpu_has(X86_FEATURE_INTEL_HFI))
> + return;

IMO it is not useful to do anything below in this function if
hfi_instances is NULL, so I would check it along with the above.

> +
> + init_hfi_cpu_index(cpu);
> +
> + /*
> + * The HFI instance of this @cpu may exist already but they have not
> + * been linked to @cpu.
> + */
> + hfi_instance = info->hfi_instance;
> + if (!hfi_instance) {
> + if (!hfi_instances)
> + return;
> +
> + if (die_id >= 0 && die_id < max_hfi_instances)
> + hfi_instance = &hfi_instances[die_id];
> +
> + if (!hfi_instance)
> + return;

And here I would do

if (die_id < 0 || die_id >= max_hfi_instances)
return;

hfi_instance = &hfi_instances[die_id];

which is one branch less and fewer LOC.

> + }
> +
> + /*
> + * Now check if the HFI instance of the package/die of this CPU has
> + * been initialized. In such case, all we have to do is link @cpu's info
> + * to the HFI instance of its die/package.
> + */
> + mutex_lock(&hfi_lock);
> + if (hfi_instance->initialized) {
> + info->hfi_instance = hfi_instance;
> +
> + /*
> + * @cpu is the first one in its die/package to come back online.
> + * Use it to track the CPUs in the die/package.
> + */
> + if (!hfi_instance->cpus)
> + hfi_instance->cpus = topology_core_cpumask(cpu);
> +
> + mutex_unlock(&hfi_lock);
> + return;
> + }
> +
> + /*
> + * Hardware is programmed with the physical address of the first page
> + * frame of the table. Hence, the allocated memory must be page-aligned.
> + */
> + hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
> + GFP_KERNEL | __GFP_ZERO);
> + if (!hfi_instance->hw_table)
> + goto err_out;
> +
> + hw_table_pa = virt_to_phys(hfi_instance->hw_table);
> +
> + hfi_instance->table_base = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
> + GFP_KERNEL);
> + if (!hfi_instance->table_base)
> + goto free_hw_table;
> +
> + /*
> + * Program the address of the feedback table of this die/package. On
> + * some processors, hardware remembers the old address of the HFI table
> + * even after having been reprogrammed and re-enabled. Thus, do not free
> + * pages allocated for the table or reprogram the hardware with a new
> + * base address. Namely, program the hardware only once.
> + */
> + msr_val = hw_table_pa | HFI_PTR_VALID_BIT;
> + wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
> +
> + init_hfi_instance(hfi_instance);
> +
> + hfi_instance->die_id = die_id;
> +
> + /*
> + * We can use the core cpumask of any cpu in the die/package. Any of
> + * them will reflect all the CPUs the same package that are online.
> + */
> + hfi_instance->cpus = topology_core_cpumask(cpu);
> + info->hfi_instance = hfi_instance;
> + hfi_instance->initialized = true;
> +
> + mutex_unlock(&hfi_lock);
> +
> + return;
> +
> +free_hw_table:
> + free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages);
> +err_out:
> + mutex_unlock(&hfi_lock);
> +}
> +
> +/**
> + * intel_hfi_offline() - Disable HFI on @cpu
> + * @cpu: CPU in which the HFI will be disabled
> + *
> + * Remove @cpu from those covered by its HFI instance.
> + *
> + * On some processors, hardware remembers previous programming settings even
> + * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
> + * die/package of @cpu are offline. See note in intel_hfi_online().
> + */
> +void intel_hfi_offline(unsigned int cpu)
> +{
> + struct cpumask *die_cpumask = topology_core_cpumask(cpu);
> + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
> + struct hfi_instance *hfi_instance;
> +
> + if (!boot_cpu_has(X86_FEATURE_INTEL_HFI))
> + return;
> +
> + hfi_instance = info->hfi_instance;
> + if (!hfi_instance)
> + return;
> +
> + if (!hfi_instance->initialized)
> + return;
> +
> + mutex_lock(&hfi_lock);
> +
> + /*
> + * We were using the core cpumask of @cpu to track CPUs in the same
> + * die/package. Now it is going offline and we need to find another
> + * CPU we can use.
> + */
> + if (die_cpumask == hfi_instance->cpus) {
> + int new_cpu;
> +
> + new_cpu = cpumask_any_but(hfi_instance->cpus, cpu);
> + if (new_cpu >= nr_cpu_ids)
> + /* All other CPUs in the package are offline. */
> + hfi_instance->cpus = NULL;
> + else
> + hfi_instance->cpus = topology_core_cpumask(new_cpu);

Hmmm. Is topology_core_cpumask() updated when CPUs go offline and online?

> + }
> +
> + mutex_unlock(&hfi_lock);
> +}
>
> static __init int hfi_parse_features(void)
> {
> diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h
> index 42529d3ac92d..d87c3823bb76 100644
> --- a/drivers/thermal/intel/intel_hfi.h
> +++ b/drivers/thermal/intel/intel_hfi.h
> @@ -27,8 +27,12 @@
>
> #if defined(CONFIG_INTEL_HFI)
> void __init intel_hfi_init(void);
> +void intel_hfi_online(unsigned int cpu);
> +void intel_hfi_offline(unsigned int cpu);
> #else
> static inline void intel_hfi_init(void) { }
> +static inline void intel_hfi_online(unsigned int cpu) { }
> +static inline void intel_hfi_offline(unsigned int cpu) { }
> #endif
>
> #endif /* _INTEL_HFI_H */
> diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
> index ac408714d52b..2a79598a7f7a 100644
> --- a/drivers/thermal/intel/therm_throt.c
> +++ b/drivers/thermal/intel/therm_throt.c
> @@ -480,6 +480,12 @@ static int thermal_throttle_online(unsigned int cpu)
> l = apic_read(APIC_LVTTHMR);
> apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
>
> + /*
> + * Enable the package-level HFI interrupt. By now the local APIC is
> + * ready to get thermal interrupts.
> + */
> + intel_hfi_online(cpu);
> +
> return thermal_throttle_add_dev(dev, cpu);
> }
>
> @@ -489,6 +495,8 @@ static int thermal_throttle_offline(unsigned int cpu)
> struct device *dev = get_cpu_device(cpu);
> u32 l;
>
> + intel_hfi_offline(cpu);
> +
> /* Mask the thermal vector before draining evtl. pending work */
> l = apic_read(APIC_LVTTHMR);
> apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
> --
> 2.17.1
>