[PATCH v1 08/17] thermal: core: Introduce .should_bind() thermal zone callback

From: Rafael J. Wysocki
Date: Tue Jul 30 2024 - 14:23:43 EST


From: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>

The current design of the code binding cooling devices to trip points in
thermal zones is convoluted and hard to follow.

Namely, a driver that registers a thermal zone can provide .bind()
and .unbind() operations for it, which are required to call either
thermal_bind_cdev_to_trip() and thermal_unbind_cdev_from_trip(),
respectively, or thermal_zone_bind_cooling_device() and
thermal_zone_unbind_cooling_device(), respectively, for every relevant
trip point and the given cooling device. Moreover, if .bind() is
provided and .unbind() is not, the cleanup necessary during the removal
of a thermal zone or a cooling device may not be carried out.

In other words, the core relies on the thermal zone owners to do the
right thing, which is error prone and far from obvious, even though all
of that is not really necessary. Specifically, if the core could ask
the thermal zone owner, through a special thermal zone callback, whether
or not a given cooling device should be bound to a given trip point in
the given thermal zone, it might as well carry out all of the binding
and unbinding by itself. In particular, the unbinding can be done
automatically without involving the thermal zone owner at all because
all of the thermal instances associated with a thermal zone or cooling
device going away must be deleted regardless.

Accordingly, introduce a new thermal zone operation, .should_bind(),
that can be invoked by the thermal core for a given thermal zone,
trip point and cooling device combination in order to check whether
or not the cooling device should be bound to the trip point at hand.
It takes an additional cooling_spec argument allowing the thermal
zone owner to specify the highest and lowest cooling states of the
cooling device and its weight for the given trip point binding.

Make the thermal core use this operation, if present, in the absence of
.bind() and .unbind(). Note that .should_bind() will be called under
the thermal zone lock.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
---
drivers/thermal/thermal_core.c | 106 +++++++++++++++++++++++++++++++----------
include/linux/thermal.h | 10 +++
2 files changed, 92 insertions(+), 24 deletions(-)

Index: linux-pm/include/linux/thermal.h
===================================================================
--- linux-pm.orig/include/linux/thermal.h
+++ linux-pm/include/linux/thermal.h
@@ -84,11 +84,21 @@ struct thermal_trip {

struct thermal_zone_device;

+struct cooling_spec {
+ unsigned long upper; /* Highest cooling state */
+ unsigned long lower; /* Lowest cooling state */
+ unsigned int weight; /* Cooling device weight */
+};
+
struct thermal_zone_device_ops {
int (*bind) (struct thermal_zone_device *,
struct thermal_cooling_device *);
int (*unbind) (struct thermal_zone_device *,
struct thermal_cooling_device *);
+ bool (*should_bind) (struct thermal_zone_device *,
+ const struct thermal_trip *,
+ struct thermal_cooling_device *,
+ struct cooling_spec *);
int (*get_temp) (struct thermal_zone_device *, int *);
int (*set_trips) (struct thermal_zone_device *, int, int);
int (*change_mode) (struct thermal_zone_device *,
Index: linux-pm/drivers/thermal/thermal_core.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_core.c
+++ linux-pm/drivers/thermal/thermal_core.c
@@ -1000,12 +1000,61 @@ static struct class *thermal_class;

static inline
void print_bind_err_msg(struct thermal_zone_device *tz,
+ const struct thermal_trip *trip,
struct thermal_cooling_device *cdev, int ret)
{
+ if (trip) {
+ dev_err(&tz->device, "binding cdev %s to trip %d failed: %d\n",
+ cdev->type, thermal_zone_trip_id(tz, trip), ret);
+ return;
+ }
+
dev_err(&tz->device, "binding zone %s with cdev %s failed:%d\n",
tz->type, cdev->type, ret);
}

+static void thermal_zone_cdev_binding(struct thermal_zone_device *tz,
+ struct thermal_cooling_device *cdev)
+{
+ struct thermal_trip_desc *td;
+ int ret;
+
+ /*
+ * Old-style binding. The .bind() callback is expected to call
+ * thermal_bind_cdev_to_trip() under the thermal zone lock.
+ */
+ if (tz->ops.bind) {
+ ret = tz->ops.bind(tz, cdev);
+ if (ret)
+ print_bind_err_msg(tz, NULL, cdev, ret);
+
+ return;
+ }
+
+ if (!tz->ops.should_bind)
+ return;
+
+ mutex_lock(&tz->lock);
+
+ for_each_trip_desc(tz, td) {
+ struct thermal_trip *trip = &td->trip;
+ struct cooling_spec c = {
+ .upper = THERMAL_NO_LIMIT,
+ .lower = THERMAL_NO_LIMIT,
+ .weight = THERMAL_WEIGHT_DEFAULT
+ };
+
+ if (tz->ops.should_bind(tz, trip, cdev, &c)) {
+ ret = thermal_bind_cdev_to_trip(tz, trip, cdev, c.upper,
+ c.lower, c.weight);
+ if (ret)
+ print_bind_err_msg(tz, trip, cdev, ret);
+ }
+ }
+
+ mutex_unlock(&tz->lock);
+}
+
/**
* __thermal_cooling_device_register() - register a new thermal cooling device
* @np: a pointer to a device tree node.
@@ -1101,13 +1150,8 @@ __thermal_cooling_device_register(struct
list_add(&cdev->node, &thermal_cdev_list);

/* Update binding information for 'this' new cdev */
- list_for_each_entry(pos, &thermal_tz_list, node) {
- if (pos->ops.bind) {
- ret = pos->ops.bind(pos, cdev);
- if (ret)
- print_bind_err_msg(pos, cdev, ret);
- }
- }
+ list_for_each_entry(pos, &thermal_tz_list, node)
+ thermal_zone_cdev_binding(pos, cdev);

list_for_each_entry(pos, &thermal_tz_list, node)
if (atomic_cmpxchg(&pos->need_update, 1, 0))
@@ -1308,6 +1352,28 @@ unlock_list:
}
EXPORT_SYMBOL_GPL(thermal_cooling_device_update);

+static void thermal_zone_cdev_unbinding(struct thermal_zone_device *tz,
+ struct thermal_cooling_device *cdev)
+{
+ struct thermal_trip_desc *td;
+
+ /*
+ * Old-style unbinding. The .unbind callback is expected to call
+ * thermal_unbind_cdev_from_trip() under the thermal zone lock.
+ */
+ if (tz->ops.unbind) {
+ tz->ops.unbind(tz, cdev);
+ return;
+ }
+
+ mutex_lock(&tz->lock);
+
+ for_each_trip_desc(tz, td)
+ thermal_unbind_cdev_from_trip(tz, &td->trip, cdev);
+
+ mutex_unlock(&tz->lock);
+}
+
/**
* thermal_cooling_device_unregister - removes a thermal cooling device
* @cdev: the thermal cooling device to remove.
@@ -1334,10 +1400,8 @@ void thermal_cooling_device_unregister(s
list_del(&cdev->node);

/* Unbind all thermal zones associated with 'this' cdev */
- list_for_each_entry(tz, &thermal_tz_list, node) {
- if (tz->ops.unbind)
- tz->ops.unbind(tz, cdev);
- }
+ list_for_each_entry(tz, &thermal_tz_list, node)
+ thermal_zone_cdev_unbinding(tz, cdev);

mutex_unlock(&thermal_list_lock);

@@ -1412,6 +1476,7 @@ thermal_zone_device_register_with_trips(
unsigned int polling_delay)
{
const struct thermal_trip *trip = trips;
+ struct thermal_cooling_device *cdev;
struct thermal_zone_device *tz;
struct thermal_trip_desc *td;
int id;
@@ -1434,8 +1499,9 @@ thermal_zone_device_register_with_trips(
return ERR_PTR(-EINVAL);
}

- if (!ops || !ops->get_temp) {
- pr_err("Thermal zone device ops not defined\n");
+ if (!ops || !ops->get_temp || (ops->should_bind && ops->bind) ||
+ (ops->should_bind && ops->unbind)) {
+ pr_err("Thermal zone device ops not defined or invalid\n");
return ERR_PTR(-EINVAL);
}

@@ -1548,15 +1614,8 @@ thermal_zone_device_register_with_trips(
mutex_unlock(&tz->lock);

/* Bind cooling devices for this zone */
- if (tz->ops.bind) {
- struct thermal_cooling_device *cdev;
-
- list_for_each_entry(cdev, &thermal_cdev_list, node) {
- result = tz->ops.bind(tz, cdev);
- if (result)
- print_bind_err_msg(tz, cdev, result);
- }
- }
+ list_for_each_entry(cdev, &thermal_cdev_list, node)
+ thermal_zone_cdev_binding(tz, cdev);

mutex_unlock(&thermal_list_lock);

@@ -1650,8 +1709,7 @@ void thermal_zone_device_unregister(stru

/* Unbind all cdevs associated with 'this' thermal zone */
list_for_each_entry(cdev, &thermal_cdev_list, node)
- if (tz->ops.unbind)
- tz->ops.unbind(tz, cdev);
+ thermal_zone_cdev_unbinding(tz, cdev);

mutex_unlock(&thermal_list_lock);