Re: [PATCH v3] thermal: core: Call monitor_thermal_zone() if zone temperature is invalid
From: Rafael J. Wysocki
Date: Tue Jul 16 2024 - 06:05:54 EST
On Tue, Jul 16, 2024 at 1:48 AM Stefan Lippers-Hollmann <s.l-h@xxxxxx> wrote:
>
> Hi
>
> On 2024-07-15, Rafael J. Wysocki wrote:
> > On Mon, Jul 15, 2024 at 2:54 PM Stefan Lippers-Hollmann <s.l-h@xxxxxx> wrote:
> > > On 2024-07-15, Rafael J. Wysocki wrote:
> > > > On Mon, Jul 15, 2024 at 11:09 AM Daniel Lezcano
> > > > <daniel.lezcano@xxxxxxxxxx> wrote:
> > > > > On 15/07/2024 06:45, Eric Biggers wrote:
> > > > > > On Thu, Jul 04, 2024 at 01:46:26PM +0200, Rafael J. Wysocki wrote:
> > > > > >> From: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
> [...]
> > > Silencing the warnings is already a big improvement - and that patch
> > > works to this extent for me with an ax200, thanks.
> >
> > So attached is a patch that should avoid enabling the thermal zone
> > when it is not ready for use in the first place, so it should address
> > both the message and the useless polling.
> >
> > I would appreciate giving it a go (please note that it hasn't received
> > much testing so far, though).
>
> Sadly this patch doesn't seem to help:
This is likely because it is missing checks for firmware image type.
I've added them to the attached new version. Please try it.
I've also added two pr_info() messages to get a better idea of what's
going on, so please grep dmesg for "Thermal zone not ready" and
"Enabling thermal zone".
In the meantime, I'll prepare thermal core changes that should
mitigate the problem independently.
---
drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 2
drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1
drivers/net/wireless/intel/iwlwifi/mvm/tt.c | 61 +++++++++++++++++++++++----
drivers/thermal/thermal_core.c | 48 +++++++++++++++++++++
include/linux/thermal.h | 1
5 files changed, 104 insertions(+), 9 deletions(-)
Index: linux-pm/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
===================================================================
--- linux-pm.orig/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
+++ linux-pm/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
@@ -671,6 +671,7 @@ static void iwl_mvm_thermal_zone_registe
{
int i, ret;
char name[16];
+ struct thermal_zone_device *tzone;
static atomic_t counter = ATOMIC_INIT(0);
if (!iwl_mvm_is_tt_in_fw(mvm)) {
@@ -691,23 +692,46 @@ static void iwl_mvm_thermal_zone_registe
mvm->tz_device.trips[i].type = THERMAL_TRIP_PASSIVE;
mvm->tz_device.trips[i].flags = THERMAL_TRIP_FLAG_RW_TEMP;
}
- mvm->tz_device.tzone = thermal_zone_device_register_with_trips(name,
+
+ tzone = thermal_zone_device_register_with_trips(name,
mvm->tz_device.trips,
IWL_MAX_DTS_TRIPS,
mvm, &tzone_ops,
NULL, 0, 0);
- if (IS_ERR(mvm->tz_device.tzone)) {
+ if (IS_ERR(tzone)) {
IWL_DEBUG_TEMP(mvm,
"Failed to register to thermal zone (err = %ld)\n",
- PTR_ERR(mvm->tz_device.tzone));
- mvm->tz_device.tzone = NULL;
+ PTR_ERR(tzone));
return;
}
- ret = thermal_zone_device_enable(mvm->tz_device.tzone);
+ mutex_lock(&mvm->mutex);
+
+ mvm->tz_device.tzone = tzone;
+
+ if (!iwl_mvm_firmware_running(mvm) ||
+ mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR) {
+ mutex_unlock(&mvm->mutex);
+
+ pr_info("%s: Thermal zone not ready\n", __func__);
+
+ IWL_DEBUG_TEMP(mvm, "Thermal zone not ready\n");
+ return;
+ }
+
+ mutex_unlock(&mvm->mutex);
+
+ ret = thermal_zone_device_enable(tzone);
if (ret) {
IWL_DEBUG_TEMP(mvm, "Failed to enable thermal zone\n");
- thermal_zone_device_unregister(mvm->tz_device.tzone);
+
+ mutex_lock(&mvm->mutex);
+
+ mvm->tz_device.tzone = NULL;
+
+ mutex_unlock(&mvm->mutex);
+
+ thermal_zone_device_unregister(tzone);
}
}
@@ -787,14 +811,26 @@ static void iwl_mvm_cooling_device_regis
static void iwl_mvm_thermal_zone_unregister(struct iwl_mvm *mvm)
{
+ struct thermal_zone_device *tzone;
+
if (!iwl_mvm_is_tt_in_fw(mvm) || !mvm->tz_device.tzone)
return;
IWL_DEBUG_TEMP(mvm, "Thermal zone device unregister\n");
- if (mvm->tz_device.tzone) {
- thermal_zone_device_unregister(mvm->tz_device.tzone);
- mvm->tz_device.tzone = NULL;
+
+ mutex_lock(&mvm->mutex);
+
+ tzone = mvm->tz_device.tzone;
+ if (!tzone) {
+ mutex_unlock(&mvm->mutex);
+
+ return;
}
+ mvm->tz_device.tzone = NULL;
+
+ mutex_unlock(&mvm->mutex);
+
+ thermal_zone_device_unregister(tzone);
}
static void iwl_mvm_cooling_device_unregister(struct iwl_mvm *mvm)
@@ -847,3 +883,10 @@ void iwl_mvm_thermal_exit(struct iwl_mvm
#endif
mvm->init_status &= ~IWL_MVM_INIT_STATUS_THERMAL_INIT_COMPLETE;
}
+
+void iwl_mvm_thermal_tzone_enable(struct iwl_mvm *mvm)
+{
+#ifdef CONFIG_THERMAL
+ thermal_zone_schedule_device_enable(mvm->tz_device.tzone);
+#endif
+}
Index: linux-pm/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
===================================================================
--- linux-pm.orig/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ linux-pm/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -458,6 +458,8 @@ static int iwl_mvm_load_ucode_wait_alive
#ifdef CONFIG_IWLWIFI_DEBUGFS
iwl_fw_set_dbg_rec_on(&mvm->fwrt);
#endif
+ if (mvm->fwrt.cur_fw_img == IWL_UCODE_REGULAR)
+ iwl_mvm_thermal_tzone_enable(mvm);
/*
* All the BSSes in the BSS table include the GP2 in the system
Index: linux-pm/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
===================================================================
--- linux-pm.orig/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ linux-pm/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -2390,6 +2390,7 @@ void iwl_mvm_temp_notif(struct iwl_mvm *
void iwl_mvm_tt_handler(struct iwl_mvm *mvm);
void iwl_mvm_thermal_initialize(struct iwl_mvm *mvm, u32 min_backoff);
void iwl_mvm_thermal_exit(struct iwl_mvm *mvm);
+void iwl_mvm_thermal_tzone_enable(struct iwl_mvm *mvm);
void iwl_mvm_set_hw_ctkill_state(struct iwl_mvm *mvm, bool state);
int iwl_mvm_get_temp(struct iwl_mvm *mvm, s32 *temp);
void iwl_mvm_ct_kill_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb);
Index: linux-pm/drivers/thermal/thermal_core.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_core.c
+++ linux-pm/drivers/thermal/thermal_core.c
@@ -606,6 +606,54 @@ int thermal_zone_device_enable(struct th
}
EXPORT_SYMBOL_GPL(thermal_zone_device_enable);
+struct tz_enable_work {
+ struct work_struct work;
+ struct thermal_zone_device *tz;
+};
+
+static void thermal_zone_enable_work_fn(struct work_struct *work)
+{
+ struct tz_enable_work *ew = container_of(work, struct tz_enable_work, work);
+ struct thermal_zone_device *tz = ew->tz;
+
+ kfree(ew);
+
+ dev_info(&tz->device, "Enabling thermal zone\n");
+
+ thermal_zone_device_enable(tz);
+
+ put_device(&tz->device);
+}
+
+/**
+ * thermal_zone_schedule_device_enable - Enable thermal zone asynchronously
+ * @tz: Thermal zone to enable.
+ *
+ * Enable a thermal zone in a worker thread.
+ *
+ * The caller must ensure that @tz will not change while this function
+ * is running.
+ */
+int thermal_zone_schedule_device_enable(struct thermal_zone_device *tz)
+{
+ struct tz_enable_work *ew;
+
+ if (!tz)
+ return 0;
+
+ ew = kzalloc(sizeof(*ew), GFP_KERNEL);
+ if (!ew)
+ return -ENOMEM;
+
+ INIT_WORK(&ew->work, thermal_zone_enable_work_fn);
+ get_device(&tz->device);
+ ew->tz = tz;
+ schedule_work(&ew->work);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(thermal_zone_schedule_device_enable);
+
int thermal_zone_device_disable(struct thermal_zone_device *tz)
{
return thermal_zone_device_set_mode(tz, THERMAL_DEVICE_DISABLED);
Index: linux-pm/include/linux/thermal.h
===================================================================
--- linux-pm.orig/include/linux/thermal.h
+++ linux-pm/include/linux/thermal.h
@@ -275,6 +275,7 @@ bool thermal_trip_is_bound_to_cdev(struc
struct thermal_cooling_device *cdev);
int thermal_zone_device_enable(struct thermal_zone_device *tz);
+int thermal_zone_schedule_device_enable(struct thermal_zone_device *tz);
int thermal_zone_device_disable(struct thermal_zone_device *tz);
void thermal_zone_device_critical(struct thermal_zone_device *tz);
#else