Re: [PATCH v3] thermal: core: Call monitor_thermal_zone() if zone temperature is invalid

From: Rafael J. Wysocki
Date: Mon Jul 15 2024 - 10:48:38 EST


On Mon, Jul 15, 2024 at 2:54 PM Stefan Lippers-Hollmann <s.l-h@xxxxxx> wrote:
>
> Hi
>
> On 2024-07-15, Rafael J. Wysocki wrote:
> > On Mon, Jul 15, 2024 at 11:09 AM Daniel Lezcano
> > <daniel.lezcano@xxxxxxxxxx> wrote:
> > > On 15/07/2024 06:45, Eric Biggers wrote:
> > > > On Thu, Jul 04, 2024 at 01:46:26PM +0200, Rafael J. Wysocki wrote:
> > > >> From: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
> > > >>
> > > >> Commit 202aa0d4bb53 ("thermal: core: Do not call handle_thermal_trip()
> [...]
> > > Does the following change fixes the messages ?
> > >
> > > diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > > b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > > index 61a4638d1be2..b519db76d402 100644
> > > --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > > +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > > @@ -622,7 +622,7 @@ static int iwl_mvm_tzone_get_temp(struct
> > > thermal_zone_device *device,
> > >
> > > if (!iwl_mvm_firmware_running(mvm) ||
> > > mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR) {
> > > - ret = -ENODATA;
> > > + ret = -EAGAIN;
> > > goto out;
> > > }
> > >
> > >
> > > --
> >
> > It would make the message go away, but it wouldn't stop the useless
> > polling of the dead thermal zone.
>
> Silencing the warnings is already a big improvement - and that patch
> works to this extent for me with an ax200, thanks.

So attached is a patch that should avoid enabling the thermal zone
when it is not ready for use in the first place, so it should address
both the message and the useless polling.

I would appreciate giving it a go (please note that it hasn't received
much testing so far, though).
---
drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 1
drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1
drivers/net/wireless/intel/iwlwifi/mvm/tt.c | 55 ++++++++++++++++++++++-----
drivers/thermal/thermal_core.c | 46 ++++++++++++++++++++++
include/linux/thermal.h | 1
5 files changed, 95 insertions(+), 9 deletions(-)

Index: linux-pm/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
===================================================================
--- linux-pm.orig/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
+++ linux-pm/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
@@ -671,6 +671,7 @@ static void iwl_mvm_thermal_zone_registe
{
int i, ret;
char name[16];
+ struct thermal_zone_device *tzone;
static atomic_t counter = ATOMIC_INIT(0);

if (!iwl_mvm_is_tt_in_fw(mvm)) {
@@ -691,23 +692,40 @@ static void iwl_mvm_thermal_zone_registe
mvm->tz_device.trips[i].type = THERMAL_TRIP_PASSIVE;
mvm->tz_device.trips[i].flags = THERMAL_TRIP_FLAG_RW_TEMP;
}
- mvm->tz_device.tzone = thermal_zone_device_register_with_trips(name,
+
+ tzone = thermal_zone_device_register_with_trips(name,
mvm->tz_device.trips,
IWL_MAX_DTS_TRIPS,
mvm, &tzone_ops,
NULL, 0, 0);
- if (IS_ERR(mvm->tz_device.tzone)) {
+ if (IS_ERR(tzone)) {
IWL_DEBUG_TEMP(mvm,
"Failed to register to thermal zone (err = %ld)\n",
- PTR_ERR(mvm->tz_device.tzone));
- mvm->tz_device.tzone = NULL;
+ PTR_ERR(tzone));
return;
}

- ret = thermal_zone_device_enable(mvm->tz_device.tzone);
+ mutex_lock(&mvm->mutex);
+
+ mvm->tz_device.tzone = tzone;
+
+ mutex_unlock(&mvm->mutex);
+
+ if (!iwl_mvm_firmware_running(mvm)) {
+ IWL_DEBUG_TEMP(mvm, "Thermal zone not ready\n");
+ return;
+ }
+ ret = thermal_zone_device_enable(tzone);
if (ret) {
IWL_DEBUG_TEMP(mvm, "Failed to enable thermal zone\n");
- thermal_zone_device_unregister(mvm->tz_device.tzone);
+
+ mutex_lock(&mvm->mutex);
+
+ mvm->tz_device.tzone = NULL;
+
+ mutex_unlock(&mvm->mutex);
+
+ thermal_zone_device_unregister(tzone);
}
}

@@ -787,14 +805,26 @@ static void iwl_mvm_cooling_device_regis

static void iwl_mvm_thermal_zone_unregister(struct iwl_mvm *mvm)
{
+ struct thermal_zone_device *tzone;
+
if (!iwl_mvm_is_tt_in_fw(mvm) || !mvm->tz_device.tzone)
return;

IWL_DEBUG_TEMP(mvm, "Thermal zone device unregister\n");
- if (mvm->tz_device.tzone) {
- thermal_zone_device_unregister(mvm->tz_device.tzone);
- mvm->tz_device.tzone = NULL;
+
+ mutex_lock(&mvm->mutex);
+
+ tzone = mvm->tz_device.tzone;
+ if (!tzone) {
+ mutex_unlock(&mvm->mutex);
+
+ return;
}
+ mvm->tz_device.tzone = NULL;
+
+ mutex_unlock(&mvm->mutex);
+
+ thermal_zone_device_unregister(tzone);
}

static void iwl_mvm_cooling_device_unregister(struct iwl_mvm *mvm)
@@ -847,3 +877,10 @@ void iwl_mvm_thermal_exit(struct iwl_mvm
#endif
mvm->init_status &= ~IWL_MVM_INIT_STATUS_THERMAL_INIT_COMPLETE;
}
+
+void iwl_mvm_thermal_tzone_enable(struct iwl_mvm *mvm)
+{
+#ifdef CONFIG_THERMAL
+ thermal_zone_schedule_device_enable(mvm->tz_device.tzone);
+#endif
+}
Index: linux-pm/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
===================================================================
--- linux-pm.orig/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ linux-pm/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -458,6 +458,7 @@ static int iwl_mvm_load_ucode_wait_alive
#ifdef CONFIG_IWLWIFI_DEBUGFS
iwl_fw_set_dbg_rec_on(&mvm->fwrt);
#endif
+ iwl_mvm_thermal_tzone_enable(mvm);

/*
* All the BSSes in the BSS table include the GP2 in the system
Index: linux-pm/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
===================================================================
--- linux-pm.orig/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ linux-pm/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -2390,6 +2390,7 @@ void iwl_mvm_temp_notif(struct iwl_mvm *
void iwl_mvm_tt_handler(struct iwl_mvm *mvm);
void iwl_mvm_thermal_initialize(struct iwl_mvm *mvm, u32 min_backoff);
void iwl_mvm_thermal_exit(struct iwl_mvm *mvm);
+void iwl_mvm_thermal_tzone_enable(struct iwl_mvm *mvm);
void iwl_mvm_set_hw_ctkill_state(struct iwl_mvm *mvm, bool state);
int iwl_mvm_get_temp(struct iwl_mvm *mvm, s32 *temp);
void iwl_mvm_ct_kill_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb);
Index: linux-pm/drivers/thermal/thermal_core.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_core.c
+++ linux-pm/drivers/thermal/thermal_core.c
@@ -606,6 +606,52 @@ int thermal_zone_device_enable(struct th
}
EXPORT_SYMBOL_GPL(thermal_zone_device_enable);

+struct tz_enable_work {
+ struct work_struct work;
+ struct thermal_zone_device *tz;
+};
+
+static void thermal_zone_enable_work_fn(struct work_struct *work)
+{
+ struct tz_enable_work *ew = container_of(work, struct tz_enable_work, work);
+ struct thermal_zone_device *tz = ew->tz;
+
+ kfree(ew);
+
+ thermal_zone_device_enable(tz);
+
+ put_device(&tz->device);
+}
+
+/**
+ * thermal_zone_schedule_device_enable - Enable thermal zone asynchronously
+ * @tz: Thermal zone to enable.
+ *
+ * Enable a thermal zone in a worker thread.
+ *
+ * The caller must ensure that @tz will not change while this function
+ * is running.
+ */
+int thermal_zone_schedule_device_enable(struct thermal_zone_device *tz)
+{
+ struct tz_enable_work *ew;
+
+ if (!tz)
+ return 0;
+
+ ew = kzalloc(sizeof(*ew), GFP_KERNEL);
+ if (!ew)
+ return -ENOMEM;
+
+ INIT_WORK(&ew->work, thermal_zone_enable_work_fn);
+ get_device(&tz->device);
+ ew->tz = tz;
+ schedule_work(&ew->work);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(thermal_zone_schedule_device_enable);
+
int thermal_zone_device_disable(struct thermal_zone_device *tz)
{
return thermal_zone_device_set_mode(tz, THERMAL_DEVICE_DISABLED);
Index: linux-pm/include/linux/thermal.h
===================================================================
--- linux-pm.orig/include/linux/thermal.h
+++ linux-pm/include/linux/thermal.h
@@ -275,6 +275,7 @@ bool thermal_trip_is_bound_to_cdev(struc
struct thermal_cooling_device *cdev);

int thermal_zone_device_enable(struct thermal_zone_device *tz);
+int thermal_zone_schedule_device_enable(struct thermal_zone_device *tz);
int thermal_zone_device_disable(struct thermal_zone_device *tz);
void thermal_zone_device_critical(struct thermal_zone_device *tz);
#else