Re: [PATCH 2/4] thermal: intel: hfi: Tune the HFI thermal netlink event delay via debugfs

From: Zhang, Rui
Date: Tue Apr 30 2024 - 00:53:07 EST


On Mon, 2024-04-29 at 16:41 -0700, Ricardo Neri wrote:
> THe delay between an HFI interrupt and its corresponding thermal

s/THe/The

> netlink
> event has so far been hard-coded to CONFIG_HZ jiffies. This may not
> suit
> the needs of all hardware configurations or listeners of events.
>
> If the update delay is too long, much of the information of
> consecutive
> hardware updates will be lost as the HFI delayed workqueue posts a
> new
> thermal netlink event only when there are no previous pending events.
> If
> the delay is too short, multiple, events may overwhelm listeners.
>
> Listeners are better placed to determine the delay of events. They
> know how
> quickly they can act on them effectively. They may also want to
> experiment
> with different values.
>
> Start a debugfs interface to tune the notification delay.

Why this is implemented as debugfs rather than a module param?

thanks,
rui
>
> Keep things simple and do not add extra locking or memory barriers.
> This
> may result in the HFI interrupt ocassionally queueing work using
> stale
> delay values, if at all. This should not be a problem: the debugfs
> file
> will update the delay value atomically, we do not expect users to
> update
> the delay value frequently, and the delayed workqueue operates in
> jiffies
> resolution.
>
> Suggested-by: Srinivas Pandruvada
> <srinivas.pandruvada@xxxxxxxxxxxxxxx>
> Signed-off-by: Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx>
> ---
> Cc: Len Brown <len.brown@xxxxxxxxx>
> Cc: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx>
> Cc: Stanislaw Gruszka <stanislaw.gruszka@xxxxxxxxxxxxxxx>
> Cc: Zhang Rui <rui.zhang@xxxxxxxxx>
> Cc: linux-pm@xxxxxxxxxxxxxxx
> Cc: linux-kernel@xxxxxxxxxxxxxxx
> ---
>  drivers/thermal/intel/intel_hfi.c | 77
> ++++++++++++++++++++++++++++++-
>  1 file changed, 76 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/thermal/intel/intel_hfi.c
> b/drivers/thermal/intel/intel_hfi.c
> index e2b82d71ab6b..24d708268c68 100644
> --- a/drivers/thermal/intel/intel_hfi.c
> +++ b/drivers/thermal/intel/intel_hfi.c
> @@ -43,6 +43,10 @@
>  
>  #include <asm/msr.h>
>  
> +#ifdef CONFIG_DEBUG_FS
> +#include <linux/debugfs.h>
> +#endif
> +
>  #include "intel_hfi.h"
>  #include "thermal_interrupt.h"
>  
> @@ -169,6 +173,70 @@ static struct workqueue_struct *hfi_updates_wq;
>  #define HFI_UPDATE_DELAY               HZ
>  #define HFI_MAX_THERM_NOTIFY_COUNT     16
>  
> +/* Keep this variable 8-byte aligned to get atomic accesses. */
> +static unsigned long hfi_update_delay = HFI_UPDATE_DELAY;
> +
> +#ifdef CONFIG_DEBUG_FS
> +static int hfi_update_delay_get(void *data, u64 *val)
> +{
> +       mutex_lock(&hfi_instance_lock);
> +       *val = jiffies_to_msecs(hfi_update_delay);
> +       mutex_unlock(&hfi_instance_lock);
> +       return 0;
> +}
> +
> +static int hfi_update_delay_set(void *data, u64 val)
> +{
> +       /*
> +        * The mutex only serializes access to the debugfs file.
> +        *
> +        * hfi_process_event() loads @hfi_update_delay from interrupt
> context.
> +        * We could have serialized accesses with a spinlock or a
> memory barrier.
> +        * But this is a debug feature, the store of
> @hfi_update_delay is
> +        * atomic, and will seldom change. A few loads of
> @hfi_update_delay may
> +        * see stale values but the updated value will be seen
> eventually.
> +        */
> +       mutex_lock(&hfi_instance_lock);
> +       hfi_update_delay = msecs_to_jiffies(val);
> +       mutex_unlock(&hfi_instance_lock);
> +       return 0;
> +}
> +
> +DEFINE_DEBUGFS_ATTRIBUTE(hfi_update_delay_fops,
> hfi_update_delay_get,
> +                        hfi_update_delay_set, "%llu\n");
> +
> +static struct dentry *hfi_debugfs_dir;
> +
> +static void hfi_debugfs_unregister(void)
> +{
> +       debugfs_remove_recursive(hfi_debugfs_dir);
> +       hfi_debugfs_dir = NULL;
> +}
> +
> +static void hfi_debugfs_register(void)
> +{
> +       struct dentry *f;
> +
> +       hfi_debugfs_dir = debugfs_create_dir("intel_hfi", NULL);
> +       if (!hfi_debugfs_dir)
> +               return;
> +
> +       f = debugfs_create_file("update_delay_ms", 0644,
> hfi_debugfs_dir,
> +                               NULL, &hfi_update_delay_fops);
> +       if (!f)
> +               goto err;
> +
> +       return;
> +err:
> +       hfi_debugfs_unregister();
> +}
> +
> +#else
> +static void hfi_debugfs_register(void)
> +{
> +}
> +#endif /* CONFIG_DEBUG_FS */
> +
>  static void get_hfi_caps(struct hfi_instance *hfi_instance,
>                          struct thermal_genl_cpu_caps *cpu_caps)
>  {
> @@ -321,8 +389,13 @@ void intel_hfi_process_event(__u64
> pkg_therm_status_msr_val)
>         raw_spin_unlock(&hfi_instance->table_lock);
>         raw_spin_unlock(&hfi_instance->event_lock);
>  
> +       /*
> +        * debugfs may atomically store @hfi_update_delay without
> locking. The
> +        * updated value may not be immediately observed. See note in
> +        * hfi_update_delay_set().
> +        */
>         queue_delayed_work(hfi_updates_wq, &hfi_instance-
> >update_work,
> -                          HFI_UPDATE_DELAY);
> +                          hfi_update_delay);
>  }
>  
>  static void init_hfi_cpu_index(struct hfi_cpu_info *info)
> @@ -708,6 +781,8 @@ void __init intel_hfi_init(void)
>  
>         register_syscore_ops(&hfi_pm_ops);
>  
> +       hfi_debugfs_register();
> +
>         return;
>  
>  err_nl_notif: