Re: [PATCH 1/3] hwmon: tmp421: handle I2C errors

From: Guenter Roeck
Date: Wed Sep 22 2021 - 11:53:30 EST


On Wed, Sep 22, 2021 at 04:41:52PM +0300, Paul Fertser wrote:
> Function i2c_smbus_read_byte_data() can return a negative error number
> instead of the data read if I2C transaction failed for whatever reason.
>
> I consider this fix to be stable material as lack of error checking here
> leads to serious issues on production hardware. Errors treated as
> temperatures produce spurious critical temperature-crossed-threshold
> errors in BMC logs for OCP server hardware. The patch was tested with
> Mellanox OCP Mezzanine card emulating TMP421 protocol for temperature
> sensing which sometimes leads to I2C protocol error during early boot up
> stage.
>
> Cc: stable@xxxxxxxxxxxxxxx
> Signed-off-by: Paul Fertser <fercerpav@xxxxxxxxx>
> ---
> drivers/hwmon/tmp421.c | 31 ++++++++++++++++++++++---------
> 1 file changed, 22 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
> index ede66ea6a730..6175ed4b10bd 100644
> --- a/drivers/hwmon/tmp421.c
> +++ b/drivers/hwmon/tmp421.c
> @@ -93,7 +93,7 @@ struct tmp421_data {
> struct hwmon_channel_info temp_info;
> const struct hwmon_channel_info *info[2];
> struct hwmon_chip_info chip;
> - char valid;
> + int last_errno;
> unsigned long last_updated;
> unsigned long channels;
> u8 config;
> @@ -128,20 +128,30 @@ static struct tmp421_data *tmp421_update_device(struct device *dev)
> mutex_lock(&data->update_lock);
>
> if (time_after(jiffies, data->last_updated + (HZ / 2)) ||
> - !data->valid) {
> - data->config = i2c_smbus_read_byte_data(client,
> - TMP421_CONFIG_REG_1);
> + data->last_errno) {
> + data->last_errno = i2c_smbus_read_byte_data(client,
> + TMP421_CONFIG_REG_1);

No. The function should return an ERR_PTR after an error.
Something like
int ret = 0;
...
ret = i2c_smbus_read_byte_data(client, TMP421_CONFIG_REG_1);
if (ret < 0)
goto exit;
data->config = ret;
...
exit:
mutex_unlock(...);
return ret < 0 ? ERR_PTR(ret) : data;

Or, even better, let tmp421_update_device() return an error code instead
of data, and let the caller get the data pointer.

int tmp421_update_device(struct tmp421_data *data)
{
struct i2c_client *client = data->client;
int ret = 0;

...
return ret < 0 ? ret : 0;
}
...

struct tmp421_data *data = dev_get_drvdata(dev);

ret = tmp421_update_device(data);
if (ret)
return ret;


Guenter

> + if (data->last_errno < 0)
> + goto exit;
> + data->config = data->last_errno;
>
> for (i = 0; i < data->channels; i++) {
> - data->temp[i] = i2c_smbus_read_byte_data(client,
> - TMP421_TEMP_MSB[i]) << 8;
> - data->temp[i] |= i2c_smbus_read_byte_data(client,
> - TMP421_TEMP_LSB[i]);
> + data->last_errno = i2c_smbus_read_byte_data(client,
> + TMP421_TEMP_MSB[i]);
> + if (data->last_errno < 0)
> + goto exit;
> + data->temp[i] = data->last_errno << 8;
> + data->last_errno = i2c_smbus_read_byte_data(client,
> + TMP421_TEMP_LSB[i]);
> + if (data->last_errno < 0)
> + goto exit;
> + data->temp[i] |= data->last_errno;
> }
> data->last_updated = jiffies;
> - data->valid = 1;
> + data->last_errno = 0;
> }
>
> +exit:
> mutex_unlock(&data->update_lock);
>
> return data;
> @@ -152,6 +162,9 @@ static int tmp421_read(struct device *dev, enum hwmon_sensor_types type,
> {
> struct tmp421_data *tmp421 = tmp421_update_device(dev);
>
> + if (tmp421->last_errno)
> + return tmp421->last_errno;
> +
> switch (attr) {
> case hwmon_temp_input:
> if (tmp421->config & TMP421_CONFIG_RANGE)
> --
> 2.17.1
>