Re: [PATCH 4/4] thermal: intel: hfi: Add a suspend notifier

From: Rafael J. Wysocki
Date: Fri Dec 29 2023 - 12:27:54 EST


On Wed, Dec 27, 2023 at 7:28 AM Ricardo Neri
<ricardo.neri-calderon@xxxxxxxxxxxxxxx> wrote:
>
> The kernel gives the HFI hardware a memory region that the latter uses to
> provide updates to the HFI table. The kernel allocates this memory region
> at boot. It remains constant throughout runtime time.
>
> When resuming from suspend or hibernation, the restore kernel allocates a
> second memory buffer and reprograms the HFI hardware with the new location
> as part of a normal boot. The location of the second memory buffer may
> differ from the one allocated by the image kernel. Subsequently, when the
> restore kernel transfers control to the image kernel, the second buffer
> becomes invalid, potentially leading to memory corruption if the hardware
> writes to it (hardware continues using the buffer from the restore kernel).
>
> Add a suspend notifier to disable all HFI instances before jumping to the
> image kernel and enable them once the image kernel has been restored. Use
> the memory buffer that the image kernel allocated.
>
> For non-boot CPUs, rely on the CPU hotplug callbacks as CPUs are disabled
> and enabled during suspend and resume, respectively.
>
> The CPU hotplug callbacks do not cover the boot CPU. Handle the HFI
> instance of the boot CPU from the suspend notifier callback.
>
> Cc: Chen Yu <yu.c.chen@xxxxxxxxx>
> Cc: Len Brown <len.brown@xxxxxxxxx>
> Cc: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx>
> Cc: Stanislaw Gruszka <stanislaw.gruszka@xxxxxxxxxxxxxxx>
> Cc: Zhang Rui <rui.zhang@xxxxxxxxx>
> Cc: Zhao Liu <zhao1.liu@xxxxxxxxxxxxxxx>
> Cc: linux-pm@xxxxxxxxxxxxxxx
> Cc: stable@xxxxxxxxxxxxxxx
> Signed-off-by: Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx>
> ---
> drivers/thermal/intel/intel_hfi.c | 53 +++++++++++++++++++++++++++++++
> 1 file changed, 53 insertions(+)
>
> diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> index d2c874f43786..965c245e5e78 100644
> --- a/drivers/thermal/intel/intel_hfi.c
> +++ b/drivers/thermal/intel/intel_hfi.c
> @@ -30,11 +30,13 @@
> #include <linux/kernel.h>
> #include <linux/math.h>
> #include <linux/mutex.h>
> +#include <linux/notifier.h>
> #include <linux/percpu-defs.h>
> #include <linux/printk.h>
> #include <linux/processor.h>
> #include <linux/slab.h>
> #include <linux/spinlock.h>
> +#include <linux/suspend.h>
> #include <linux/string.h>
> #include <linux/topology.h>
> #include <linux/workqueue.h>
> @@ -569,11 +571,62 @@ static __init int hfi_parse_features(void)
> return 0;
> }
>
> +static void hfi_do_pm_enable(void *info)
> +{
> + struct hfi_instance *hfi_instance = info;
> +
> + hfi_set_hw_table(hfi_instance);
> + hfi_enable();

The above do RMW, so should locking be used here?

> +}
> +
> +static void hfi_do_pm_disable(void *info)
> +{
> + hfi_disable();
> +}

And here?

> +
> +static int hfi_pm_notify(struct notifier_block *nb,
> + unsigned long mode, void *unused)
> +{
> + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
> + struct hfi_instance *hfi_instance = info->hfi_instance;
> +
> + /* HFI may not be in use. */
> + if (!hfi_instance)
> + return 0;
> +
> + /*
> + * Only handle the HFI instance of the package of the boot CPU. The
> + * instances of other packages are handled in the CPU hotplug callbacks.
> + */
> + switch (mode) {
> + case PM_HIBERNATION_PREPARE:
> + case PM_SUSPEND_PREPARE:
> + case PM_RESTORE_PREPARE:
> + return smp_call_function_single(0, hfi_do_pm_disable,
> + NULL, true);
> +
> + case PM_POST_RESTORE:
> + case PM_POST_HIBERNATION:
> + case PM_POST_SUSPEND:
> + return smp_call_function_single(0, hfi_do_pm_enable,
> + hfi_instance, true);
> + default:
> + return -EINVAL;
> + }
> +}
> +
> +static struct notifier_block hfi_pm_nb = {
> + .notifier_call = hfi_pm_notify,
> +};
> +
> void __init intel_hfi_init(void)
> {
> struct hfi_instance *hfi_instance;
> int i, j;
>
> + if (register_pm_notifier(&hfi_pm_nb))
> + return;
> +
> if (hfi_parse_features())
> return;
>
> --