Re: [PATCH v2] hwmon (occ): Delay hwmon registration until user request

From: Guenter Roeck
Date: Wed Apr 27 2022 - 10:35:03 EST


On Wed, Apr 27, 2022 at 09:04:43AM -0500, Eddie James wrote:
> Instead of registering the hwmon device at probe time, use the
> existing "occ_active" sysfs file to control when the driver polls
> the OCC for sensor data and registers with hwmon. The reason for
> this change is that the SBE, which is the device by which the
> driver communicates with the OCC, cannot handle communications
> during certain system state transitions, resulting in
> unrecoverable system errors.
>
> Signed-off-by: Eddie James <eajames@xxxxxxxxxxxxx>

Applied to hwmon-next.

Guenter

> ---
> Changes since v1:
> - Update commit message to for clarity.
>
> drivers/hwmon/occ/common.c | 100 +++++++++++++++++++--------
> drivers/hwmon/occ/common.h | 5 +-
> drivers/hwmon/occ/p8_i2c.c | 2 +-
> drivers/hwmon/occ/p9_sbe.c | 2 +-
> drivers/hwmon/occ/sysfs.c | 137 ++++++++++++++++++++++---------------
> 5 files changed, 156 insertions(+), 90 deletions(-)
>
> diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
> index f00cd59f1d19..d78f4bebc718 100644
> --- a/drivers/hwmon/occ/common.c
> +++ b/drivers/hwmon/occ/common.c
> @@ -1149,44 +1149,75 @@ static void occ_parse_poll_response(struct occ *occ)
> sizeof(*header), size + sizeof(*header));
> }
>
> -int occ_setup(struct occ *occ, const char *name)
> +int occ_active(struct occ *occ, bool active)
> {
> - int rc;
> -
> - mutex_init(&occ->lock);
> - occ->groups[0] = &occ->group;
> + int rc = mutex_lock_interruptible(&occ->lock);
>
> - /* no need to lock */
> - rc = occ_poll(occ);
> - if (rc == -ESHUTDOWN) {
> - dev_info(occ->bus_dev, "host is not ready\n");
> - return rc;
> - } else if (rc < 0) {
> - dev_err(occ->bus_dev,
> - "failed to get OCC poll response=%02x: %d\n",
> - occ->resp.return_status, rc);
> + if (rc)
> return rc;
> - }
>
> - occ->next_update = jiffies + OCC_UPDATE_FREQUENCY;
> - occ_parse_poll_response(occ);
> + if (active) {
> + if (occ->active) {
> + rc = -EALREADY;
> + goto unlock;
> + }
>
> - rc = occ_setup_sensor_attrs(occ);
> - if (rc) {
> - dev_err(occ->bus_dev, "failed to setup sensor attrs: %d\n",
> - rc);
> - return rc;
> - }
> + occ->error_count = 0;
> + occ->last_safe = 0;
>
> - occ->hwmon = devm_hwmon_device_register_with_groups(occ->bus_dev, name,
> - occ, occ->groups);
> - if (IS_ERR(occ->hwmon)) {
> - rc = PTR_ERR(occ->hwmon);
> - dev_err(occ->bus_dev, "failed to register hwmon device: %d\n",
> - rc);
> - return rc;
> + rc = occ_poll(occ);
> + if (rc < 0) {
> + dev_err(occ->bus_dev,
> + "failed to get OCC poll response=%02x: %d\n",
> + occ->resp.return_status, rc);
> + goto unlock;
> + }
> +
> + occ->active = true;
> + occ->next_update = jiffies + OCC_UPDATE_FREQUENCY;
> + occ_parse_poll_response(occ);
> +
> + rc = occ_setup_sensor_attrs(occ);
> + if (rc) {
> + dev_err(occ->bus_dev,
> + "failed to setup sensor attrs: %d\n", rc);
> + goto unlock;
> + }
> +
> + occ->hwmon = hwmon_device_register_with_groups(occ->bus_dev,
> + "occ", occ,
> + occ->groups);
> + if (IS_ERR(occ->hwmon)) {
> + rc = PTR_ERR(occ->hwmon);
> + occ->hwmon = NULL;
> + dev_err(occ->bus_dev,
> + "failed to register hwmon device: %d\n", rc);
> + goto unlock;
> + }
> + } else {
> + if (!occ->active) {
> + rc = -EALREADY;
> + goto unlock;
> + }
> +
> + if (occ->hwmon)
> + hwmon_device_unregister(occ->hwmon);
> + occ->active = false;
> + occ->hwmon = NULL;
> }
>
> +unlock:
> + mutex_unlock(&occ->lock);
> + return rc;
> +}
> +
> +int occ_setup(struct occ *occ)
> +{
> + int rc;
> +
> + mutex_init(&occ->lock);
> + occ->groups[0] = &occ->group;
> +
> rc = occ_setup_sysfs(occ);
> if (rc)
> dev_err(occ->bus_dev, "failed to setup sysfs: %d\n", rc);
> @@ -1195,6 +1226,15 @@ int occ_setup(struct occ *occ, const char *name)
> }
> EXPORT_SYMBOL_GPL(occ_setup);
>
> +void occ_shutdown(struct occ *occ)
> +{
> + occ_shutdown_sysfs(occ);
> +
> + if (occ->hwmon)
> + hwmon_device_unregister(occ->hwmon);
> +}
> +EXPORT_SYMBOL_GPL(occ_shutdown);
> +
> MODULE_AUTHOR("Eddie James <eajames@xxxxxxxxxxxxx>");
> MODULE_DESCRIPTION("Common OCC hwmon code");
> MODULE_LICENSE("GPL");
> diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h
> index 2dd4a4d240c0..64d5ec7e169b 100644
> --- a/drivers/hwmon/occ/common.h
> +++ b/drivers/hwmon/occ/common.h
> @@ -106,6 +106,7 @@ struct occ {
> struct attribute_group group;
> const struct attribute_group *groups[2];
>
> + bool active;
> int error; /* final transfer error after retry */
> int last_error; /* latest transfer error */
> unsigned int error_count; /* number of xfr errors observed */
> @@ -123,9 +124,11 @@ struct occ {
> u8 prev_mode;
> };
>
> -int occ_setup(struct occ *occ, const char *name);
> +int occ_active(struct occ *occ, bool active);
> +int occ_setup(struct occ *occ);
> int occ_setup_sysfs(struct occ *occ);
> void occ_shutdown(struct occ *occ);
> +void occ_shutdown_sysfs(struct occ *occ);
> void occ_sysfs_poll_done(struct occ *occ);
> int occ_update_response(struct occ *occ);
>
> diff --git a/drivers/hwmon/occ/p8_i2c.c b/drivers/hwmon/occ/p8_i2c.c
> index 9e61e1fb5142..da39ea28df31 100644
> --- a/drivers/hwmon/occ/p8_i2c.c
> +++ b/drivers/hwmon/occ/p8_i2c.c
> @@ -223,7 +223,7 @@ static int p8_i2c_occ_probe(struct i2c_client *client)
> occ->poll_cmd_data = 0x10; /* P8 OCC poll data */
> occ->send_cmd = p8_i2c_occ_send_cmd;
>
> - return occ_setup(occ, "p8_occ");
> + return occ_setup(occ);
> }
>
> static int p8_i2c_occ_remove(struct i2c_client *client)
> diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c
> index 49b13cc01073..42fc7b97bb34 100644
> --- a/drivers/hwmon/occ/p9_sbe.c
> +++ b/drivers/hwmon/occ/p9_sbe.c
> @@ -145,7 +145,7 @@ static int p9_sbe_occ_probe(struct platform_device *pdev)
> occ->poll_cmd_data = 0x20; /* P9 OCC poll data */
> occ->send_cmd = p9_sbe_occ_send_cmd;
>
> - rc = occ_setup(occ, "p9_occ");
> + rc = occ_setup(occ);
> if (rc == -ESHUTDOWN)
> rc = -ENODEV; /* Host is shutdown, don't spew errors */
>
> diff --git a/drivers/hwmon/occ/sysfs.c b/drivers/hwmon/occ/sysfs.c
> index b2f788a77746..2317301fc1e9 100644
> --- a/drivers/hwmon/occ/sysfs.c
> +++ b/drivers/hwmon/occ/sysfs.c
> @@ -6,13 +6,13 @@
> #include <linux/export.h>
> #include <linux/hwmon-sysfs.h>
> #include <linux/kernel.h>
> +#include <linux/kstrtox.h>
> #include <linux/sysfs.h>
>
> #include "common.h"
>
> /* OCC status register */
> #define OCC_STAT_MASTER BIT(7)
> -#define OCC_STAT_ACTIVE BIT(0)
>
> /* OCC extended status register */
> #define OCC_EXT_STAT_DVFS_OT BIT(7)
> @@ -22,6 +22,25 @@
> #define OCC_EXT_STAT_DVFS_VDD BIT(3)
> #define OCC_EXT_STAT_GPU_THROTTLE GENMASK(2, 0)
>
> +static ssize_t occ_active_store(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t count)
> +{
> + int rc;
> + bool active;
> + struct occ *occ = dev_get_drvdata(dev);
> +
> + rc = kstrtobool(buf, &active);
> + if (rc)
> + return rc;
> +
> + rc = occ_active(occ, active);
> + if (rc)
> + return rc;
> +
> + return count;
> +}
> +
> static ssize_t occ_sysfs_show(struct device *dev,
> struct device_attribute *attr, char *buf)
> {
> @@ -31,54 +50,64 @@ static ssize_t occ_sysfs_show(struct device *dev,
> struct occ_poll_response_header *header;
> struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
>
> - rc = occ_update_response(occ);
> - if (rc)
> - return rc;
> + if (occ->active) {
> + rc = occ_update_response(occ);
> + if (rc)
> + return rc;
>
> - header = (struct occ_poll_response_header *)occ->resp.data;
> -
> - switch (sattr->index) {
> - case 0:
> - val = !!(header->status & OCC_STAT_MASTER);
> - break;
> - case 1:
> - val = !!(header->status & OCC_STAT_ACTIVE);
> - break;
> - case 2:
> - val = !!(header->ext_status & OCC_EXT_STAT_DVFS_OT);
> - break;
> - case 3:
> - val = !!(header->ext_status & OCC_EXT_STAT_DVFS_POWER);
> - break;
> - case 4:
> - val = !!(header->ext_status & OCC_EXT_STAT_MEM_THROTTLE);
> - break;
> - case 5:
> - val = !!(header->ext_status & OCC_EXT_STAT_QUICK_DROP);
> - break;
> - case 6:
> - val = header->occ_state;
> - break;
> - case 7:
> - if (header->status & OCC_STAT_MASTER)
> - val = hweight8(header->occs_present);
> - else
> + header = (struct occ_poll_response_header *)occ->resp.data;
> +
> + switch (sattr->index) {
> + case 0:
> + val = !!(header->status & OCC_STAT_MASTER);
> + break;
> + case 1:
> val = 1;
> - break;
> - case 8:
> - val = header->ips_status;
> - break;
> - case 9:
> - val = header->mode;
> - break;
> - case 10:
> - val = !!(header->ext_status & OCC_EXT_STAT_DVFS_VDD);
> - break;
> - case 11:
> - val = header->ext_status & OCC_EXT_STAT_GPU_THROTTLE;
> - break;
> - default:
> - return -EINVAL;
> + break;
> + case 2:
> + val = !!(header->ext_status & OCC_EXT_STAT_DVFS_OT);
> + break;
> + case 3:
> + val = !!(header->ext_status & OCC_EXT_STAT_DVFS_POWER);
> + break;
> + case 4:
> + val = !!(header->ext_status &
> + OCC_EXT_STAT_MEM_THROTTLE);
> + break;
> + case 5:
> + val = !!(header->ext_status & OCC_EXT_STAT_QUICK_DROP);
> + break;
> + case 6:
> + val = header->occ_state;
> + break;
> + case 7:
> + if (header->status & OCC_STAT_MASTER)
> + val = hweight8(header->occs_present);
> + else
> + val = 1;
> + break;
> + case 8:
> + val = header->ips_status;
> + break;
> + case 9:
> + val = header->mode;
> + break;
> + case 10:
> + val = !!(header->ext_status & OCC_EXT_STAT_DVFS_VDD);
> + break;
> + case 11:
> + val = header->ext_status & OCC_EXT_STAT_GPU_THROTTLE;
> + break;
> + default:
> + return -EINVAL;
> + }
> + } else {
> + if (sattr->index == 1)
> + val = 0;
> + else if (sattr->index <= 11)
> + val = -ENODATA;
> + else
> + return -EINVAL;
> }
>
> return sysfs_emit(buf, "%d\n", val);
> @@ -95,7 +124,8 @@ static ssize_t occ_error_show(struct device *dev,
> }
>
> static SENSOR_DEVICE_ATTR(occ_master, 0444, occ_sysfs_show, NULL, 0);
> -static SENSOR_DEVICE_ATTR(occ_active, 0444, occ_sysfs_show, NULL, 1);
> +static SENSOR_DEVICE_ATTR(occ_active, 0644, occ_sysfs_show, occ_active_store,
> + 1);
> static SENSOR_DEVICE_ATTR(occ_dvfs_overtemp, 0444, occ_sysfs_show, NULL, 2);
> static SENSOR_DEVICE_ATTR(occ_dvfs_power, 0444, occ_sysfs_show, NULL, 3);
> static SENSOR_DEVICE_ATTR(occ_mem_throttle, 0444, occ_sysfs_show, NULL, 4);
> @@ -139,7 +169,7 @@ void occ_sysfs_poll_done(struct occ *occ)
> * On the first poll response, we haven't yet created the sysfs
> * attributes, so don't make any notify calls.
> */
> - if (!occ->hwmon)
> + if (!occ->active)
> goto done;
>
> if ((header->status & OCC_STAT_MASTER) !=
> @@ -148,12 +178,6 @@ void occ_sysfs_poll_done(struct occ *occ)
> sysfs_notify(&occ->bus_dev->kobj, NULL, name);
> }
>
> - if ((header->status & OCC_STAT_ACTIVE) !=
> - (occ->prev_stat & OCC_STAT_ACTIVE)) {
> - name = sensor_dev_attr_occ_active.dev_attr.attr.name;
> - sysfs_notify(&occ->bus_dev->kobj, NULL, name);
> - }
> -
> if ((header->ext_status & OCC_EXT_STAT_DVFS_OT) !=
> (occ->prev_ext_stat & OCC_EXT_STAT_DVFS_OT)) {
> name = sensor_dev_attr_occ_dvfs_overtemp.dev_attr.attr.name;
> @@ -227,8 +251,7 @@ int occ_setup_sysfs(struct occ *occ)
> return sysfs_create_group(&occ->bus_dev->kobj, &occ_sysfs);
> }
>
> -void occ_shutdown(struct occ *occ)
> +void occ_shutdown_sysfs(struct occ *occ)
> {
> sysfs_remove_group(&occ->bus_dev->kobj, &occ_sysfs);
> }
> -EXPORT_SYMBOL_GPL(occ_shutdown);