Re: [PATCH v3 09/10] drivers/hwmon: Add PECI hwmon client drivers

From: Guenter Roeck
Date: Wed Apr 11 2018 - 23:41:15 EST


On 04/11/2018 07:51 PM, Jae Hyun Yoo wrote:
On 4/11/2018 5:34 PM, Guenter Roeck wrote:
On 04/11/2018 02:59 PM, Jae Hyun Yoo wrote:
Hi Guenter,

Thanks a lot for sharing your time. Please see my inline answers.

On 4/10/2018 3:28 PM, Guenter Roeck wrote:
On Tue, Apr 10, 2018 at 11:32:11AM -0700, Jae Hyun Yoo wrote:
This commit adds PECI cputemp and dimmtemp hwmon drivers.

Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@xxxxxxxxxxxxxxx>
Reviewed-by: Haiyue Wang <haiyue.wang@xxxxxxxxxxxxxxx>
Reviewed-by: James Feist <james.feist@xxxxxxxxxxxxxxx>
Reviewed-by: Vernon Mauery <vernon.mauery@xxxxxxxxxxxxxxx>
Cc: Alan Cox <alan@xxxxxxxxxxxxxxx>
Cc: Andrew Jeffery <andrew@xxxxxxxx>
Cc: Andrew Lunn <andrew@xxxxxxx>
Cc: Andy Shevchenko <andriy.shevchenko@xxxxxxxxx>
Cc: Arnd Bergmann <arnd@xxxxxxxx>
Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
Cc: Fengguang Wu <fengguang.wu@xxxxxxxxx>
Cc: Greg KH <gregkh@xxxxxxxxxxxxxxxxxxx>
Cc: Guenter Roeck <linux@xxxxxxxxxxxx>
Cc: Jason M Biils <jason.m.bills@xxxxxxxxxxxxxxx>
Cc: Jean Delvare <jdelvare@xxxxxxxx>
Cc: Joel Stanley <joel@xxxxxxxxx>
Cc: Julia Cartwright <juliac@xxxxxxxxxxxx>
Cc: Miguel Ojeda <miguel.ojeda.sandonis@xxxxxxxxx>
Cc: Milton Miller II <miltonm@xxxxxxxxxx>
Cc: Pavel Machek <pavel@xxxxxx>
Cc: Randy Dunlap <rdunlap@xxxxxxxxxxxxx>
Cc: Stef van Os <stef.van.os@xxxxxxxxxxxxxxxxxxxxxxxxx>
Cc: Sumeet R Pawnikar <sumeet.r.pawnikar@xxxxxxxxx>
---
 drivers/hwmon/Kconfig | 28 ++
 drivers/hwmon/Makefile | 2 +
 drivers/hwmon/peci-cputemp.c | 783 ++++++++++++++++++++++++++++++++++++++++++
 drivers/hwmon/peci-dimmtemp.c | 432 +++++++++++++++++++++++
 4 files changed, 1245 insertions(+)
 create mode 100644 drivers/hwmon/peci-cputemp.c
 create mode 100644 drivers/hwmon/peci-dimmtemp.c

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index f249a4428458..c52f610f81d0 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1259,6 +1259,34 @@ config SENSORS_NCT7904
 This driver can also be built as a module. If so, the module
ÂÂÂÂÂÂÂ will be called nct7904.
+config SENSORS_PECI_CPUTEMP
+ÂÂÂ tristate "PECI CPU temperature monitoring support"
+ÂÂÂ depends on OF
+ÂÂÂ depends on PECI
+ÂÂÂ help
+ÂÂÂÂÂ If you say yes here you get support for the generic Intel PECI
+ÂÂÂÂÂ cputemp driver which provides Digital Thermal Sensor (DTS) thermal
+ÂÂÂÂÂ readings of the CPU package and CPU cores that are accessible using
+ÂÂÂÂÂ the PECI Client Command Suite via the processor PECI client.
+ÂÂÂÂÂ Check Documentation/hwmon/peci-cputemp for details.
+
+ This driver can also be built as a module. If so, the module
+ÂÂÂÂÂ will be called peci-cputemp.
+
+config SENSORS_PECI_DIMMTEMP
+ÂÂÂ tristate "PECI DIMM temperature monitoring support"
+ÂÂÂ depends on OF
+ÂÂÂ depends on PECI
+ÂÂÂ help
+ÂÂÂÂÂ If you say yes here you get support for the generic Intel PECI hwmon
+ÂÂÂÂÂ driver which provides Digital Thermal Sensor (DTS) thermal readings of
+ÂÂÂÂÂ DIMM components that are accessible using the PECI Client Command
+ÂÂÂÂÂ Suite via the processor PECI client.
+ÂÂÂÂÂ Check Documentation/hwmon/peci-dimmtemp for details.
+
+ This driver can also be built as a module. If so, the module
+ÂÂÂÂÂ will be called peci-dimmtemp.
+
 config SENSORS_NSA320
ÂÂÂÂÂ tristate "ZyXEL NSA320 and compatible fan speed and temperature sensors"
ÂÂÂÂÂ depends on GPIOLIB && OF
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index e7d52a36e6c4..48d9598fcd3a 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -136,6 +136,8 @@ obj-$(CONFIG_SENSORS_NCT7802)ÂÂÂ += nct7802.o
 obj-$(CONFIG_SENSORS_NCT7904) += nct7904.o
 obj-$(CONFIG_SENSORS_NSA320) += nsa320-hwmon.o
 obj-$(CONFIG_SENSORS_NTC_THERMISTOR) += ntc_thermistor.o
+obj-$(CONFIG_SENSORS_PECI_CPUTEMP)ÂÂÂ += peci-cputemp.o
+obj-$(CONFIG_SENSORS_PECI_DIMMTEMP)ÂÂÂ += peci-dimmtemp.o
 obj-$(CONFIG_SENSORS_PC87360) += pc87360.o
 obj-$(CONFIG_SENSORS_PC87427) += pc87427.o
 obj-$(CONFIG_SENSORS_PCF8591) += pcf8591.o
diff --git a/drivers/hwmon/peci-cputemp.c b/drivers/hwmon/peci-cputemp.c
new file mode 100644
index 000000000000..f0bc92687512
--- /dev/null
+++ b/drivers/hwmon/peci-cputemp.c
@@ -0,0 +1,783 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Intel Corporation
+
+#include <linux/delay.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>

Is this include needed ?


No it isn't. Will drop the line.

+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/peci.h>
+
+#define TEMP_TYPE_PECIÂÂÂÂÂÂÂ 6Â /* Sensor type 6: Intel PECI */
+
+#define CORE_MAX_ON_HSXÂÂÂÂÂÂ 18 /* Max number of cores on Haswell */
+#define CORE_MAX_ON_BDXÂÂÂÂÂÂ 24 /* Max number of cores on Broadwell */
+#define CORE_MAX_ON_SKXÂÂÂÂÂÂ 28 /* Max number of cores on Skylake */
+
+#define DEFAULT_CHANNEL_NUMSÂ 5
+#define CORETEMP_CHANNEL_NUMS CORE_MAX_ON_SKX
+#define CPUTEMP_CHANNEL_NUMSÂ (DEFAULT_CHANNEL_NUMS + CORETEMP_CHANNEL_NUMS)
+
+#define CLIENT_CPU_ID_MASKÂÂÂ 0xf0ff0Â /* Mask for Family / Model info */
+
+#define UPDATE_INTERVAL_MINÂÂ HZ
+
+enum cpu_gens {
+ÂÂÂ CPU_GEN_HSX, /* Haswell Xeon */
+ÂÂÂ CPU_GEN_BRX, /* Broadwell Xeon */
+ÂÂÂ CPU_GEN_SKX, /* Skylake Xeon */
+ÂÂÂ CPU_GEN_MAX
+};
+
+struct cpu_gen_info {
+ÂÂÂ u32 type;
+ÂÂÂ u32 cpu_id;
+ÂÂÂ u32 core_max;
+};
+
+struct temp_data {
+ÂÂÂ bool valid;
+ÂÂÂ s32Â value;
+ÂÂÂ unsigned long last_updated;
+};
+
+struct temp_group {
+ÂÂÂ struct temp_data die;
+ÂÂÂ struct temp_data dts_margin;
+ÂÂÂ struct temp_data tcontrol;
+ÂÂÂ struct temp_data tthrottle;
+ÂÂÂ struct temp_data tjmax;
+ÂÂÂ struct temp_data core[CORETEMP_CHANNEL_NUMS];
+};
+
+struct peci_cputemp {
+ÂÂÂ struct peci_client *client;
+ÂÂÂ struct device *dev;
+ÂÂÂ char name[PECI_NAME_SIZE];
+ÂÂÂ struct temp_group temp;
+ÂÂÂ u8 addr;
+ÂÂÂ uint cpu_no;
+ÂÂÂ const struct cpu_gen_info *gen_info;
+ÂÂÂ u32 core_mask;
+ÂÂÂ u32 temp_config[CPUTEMP_CHANNEL_NUMS + 1];
+ÂÂÂ uint config_idx;
+ÂÂÂ struct hwmon_channel_info temp_info;
+ÂÂÂ const struct hwmon_channel_info *info[2];
+ÂÂÂ struct hwmon_chip_info chip;
+};
+
+enum cputemp_channels {
+ÂÂÂ channel_die,
+ÂÂÂ channel_dts_mrgn,
+ÂÂÂ channel_tcontrol,
+ÂÂÂ channel_tthrottle,
+ÂÂÂ channel_tjmax,
+ÂÂÂ channel_core,
+};
+
+static const struct cpu_gen_info cpu_gen_info_table[] = {
+ÂÂÂ { .type = CPU_GEN_HSX,
+ÂÂÂÂÂ .cpu_id = 0x306f0, /* Family code: 6, Model number: 63 (0x3f) */
+ÂÂÂÂÂ .core_max = CORE_MAX_ON_HSX },
+ÂÂÂ { .type = CPU_GEN_BRX,
+ÂÂÂÂÂ .cpu_id = 0x406f0, /* Family code: 6, Model number: 79 (0x4f) */
+ÂÂÂÂÂ .core_max = CORE_MAX_ON_BDX },
+ÂÂÂ { .type = CPU_GEN_SKX,
+ÂÂÂÂÂ .cpu_id = 0x50650, /* Family code: 6, Model number: 85 (0x55) */
+ÂÂÂÂÂ .core_max = CORE_MAX_ON_SKX },
+};
+
+static const u32 config_table[DEFAULT_CHANNEL_NUMS + 1] = {
+ÂÂÂ /* Die temperature */
+ÂÂÂ HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT |
+ÂÂÂ HWMON_T_CRIT_HYST,
+
+ÂÂÂ /* DTS margin temperature */
+ÂÂÂ HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_LCRIT,
+
+ÂÂÂ /* Tcontrol temperature */
+ÂÂÂ HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_CRIT,
+
+ÂÂÂ /* Tthrottle temperature */
+ÂÂÂ HWMON_T_LABEL | HWMON_T_INPUT,
+
+ÂÂÂ /* Tjmax temperature */
+ÂÂÂ HWMON_T_LABEL | HWMON_T_INPUT,
+
+ÂÂÂ /* Core temperature - for all core channels */
+ÂÂÂ HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT |
+ÂÂÂ HWMON_T_CRIT_HYST,
+};
+
+static const char *cputemp_label[CPUTEMP_CHANNEL_NUMS] = {
+ÂÂÂ "Die",
+ÂÂÂ "DTS margin",
+ÂÂÂ "Tcontrol",
+ÂÂÂ "Tthrottle",
+ÂÂÂ "Tjmax",
+ÂÂÂ "Core 0", "Core 1", "Core 2", "Core 3",
+ÂÂÂ "Core 4", "Core 5", "Core 6", "Core 7",
+ÂÂÂ "Core 8", "Core 9", "Core 10", "Core 11",
+ÂÂÂ "Core 12", "Core 13", "Core 14", "Core 15",
+ÂÂÂ "Core 16", "Core 17", "Core 18", "Core 19",
+ÂÂÂ "Core 20", "Core 21", "Core 22", "Core 23",
+};
+
+static int send_peci_cmd(struct peci_cputemp *priv,
+ÂÂÂÂÂÂÂÂÂÂÂÂ enum peci_cmd cmd,
+ÂÂÂÂÂÂÂÂÂÂÂÂ void *msg)
+{
+ÂÂÂ return peci_command(priv->client->adapter, cmd, msg);
+}
+
+static int need_update(struct temp_data *temp)

Please use bool.


Okay. I'll use bool instead of int.

+{
+ÂÂÂ if (temp->valid &&
+ÂÂÂÂÂÂÂ time_before(jiffies, temp->last_updated + UPDATE_INTERVAL_MIN))
+ÂÂÂÂÂÂÂ return 0;
+
+ÂÂÂ return 1;
+}
+
+static void mark_updated(struct temp_data *temp)
+{
+ÂÂÂ temp->valid = true;
+ÂÂÂ temp->last_updated = jiffies;
+}
+
+static s32 ten_dot_six_to_millidegree(s32 val)
+{
+ÂÂÂ return ((val ^ 0x8000) - 0x8000) * 1000 / 64;
+}
+
+static int get_tjmax(struct peci_cputemp *priv)
+{
+ÂÂÂ struct peci_rd_pkg_cfg_msg msg;
+ÂÂÂ int rc;
+
+ÂÂÂ if (!priv->temp.tjmax.valid) {
+ÂÂÂÂÂÂÂ msg.addr = priv->addr;
+ÂÂÂÂÂÂÂ msg.index = MBX_INDEX_TEMP_TARGET;
+ÂÂÂÂÂÂÂ msg.param = 0;
+ÂÂÂÂÂÂÂ msg.rx_len = 4;
+
+ÂÂÂÂÂÂÂ rc = send_peci_cmd(priv, PECI_CMD_RD_PKG_CFG, &msg);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ priv->temp.tjmax.value = (s32)msg.pkg_config[2] * 1000;
+ÂÂÂÂÂÂÂ priv->temp.tjmax.valid = true;
+ÂÂÂ }
+
+ÂÂÂ return 0;
+}
+
+static int get_tcontrol(struct peci_cputemp *priv)
+{
+ÂÂÂ struct peci_rd_pkg_cfg_msg msg;
+ÂÂÂ s32 tcontrol_margin;
+ÂÂÂ s32 tthrottle_offset;
+ÂÂÂ int rc;
+
+ÂÂÂ if (!need_update(&priv->temp.tcontrol))
+ÂÂÂÂÂÂÂ return 0;
+
+ÂÂÂ rc = get_tjmax(priv);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂ msg.addr = priv->addr;
+ÂÂÂ msg.index = MBX_INDEX_TEMP_TARGET;
+ÂÂÂ msg.param = 0;
+ÂÂÂ msg.rx_len = 4;
+
+ÂÂÂ rc = send_peci_cmd(priv, PECI_CMD_RD_PKG_CFG, &msg);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂ tcontrol_margin = msg.pkg_config[1];
+ÂÂÂ tcontrol_margin = ((tcontrol_margin ^ 0x80) - 0x80) * 1000;
+ÂÂÂ priv->temp.tcontrol.value = priv->temp.tjmax.value - tcontrol_margin;
+
+ÂÂÂ tthrottle_offset = (msg.pkg_config[3] & 0x2f) * 1000;
+ÂÂÂ priv->temp.tthrottle.value = priv->temp.tjmax.value - tthrottle_offset;
+
+ÂÂÂ mark_updated(&priv->temp.tcontrol);
+ÂÂÂ mark_updated(&priv->temp.tthrottle);
+
+ÂÂÂ return 0;
+}
+
+static int get_tthrottle(struct peci_cputemp *priv)
+{
+ÂÂÂ struct peci_rd_pkg_cfg_msg msg;
+ÂÂÂ s32 tcontrol_margin;
+ÂÂÂ s32 tthrottle_offset;
+ÂÂÂ int rc;
+
+ÂÂÂ if (!need_update(&priv->temp.tthrottle))
+ÂÂÂÂÂÂÂ return 0;
+
+ÂÂÂ rc = get_tjmax(priv);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂ msg.addr = priv->addr;
+ÂÂÂ msg.index = MBX_INDEX_TEMP_TARGET;
+ÂÂÂ msg.param = 0;
+ÂÂÂ msg.rx_len = 4;
+
+ÂÂÂ rc = send_peci_cmd(priv, PECI_CMD_RD_PKG_CFG, &msg);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂ tthrottle_offset = (msg.pkg_config[3] & 0x2f) * 1000;
+ÂÂÂ priv->temp.tthrottle.value = priv->temp.tjmax.value - tthrottle_offset;
+
+ÂÂÂ tcontrol_margin = msg.pkg_config[1];
+ÂÂÂ tcontrol_margin = ((tcontrol_margin ^ 0x80) - 0x80) * 1000;
+ÂÂÂ priv->temp.tcontrol.value = priv->temp.tjmax.value - tcontrol_margin;
+
+ÂÂÂ mark_updated(&priv->temp.tthrottle);
+ÂÂÂ mark_updated(&priv->temp.tcontrol);
+
+ÂÂÂ return 0;
+}

I am quite completely missing how the two functions above are different.


The two above functions are slightly different but uses the same PECI command which provides both Tthrottle and Tcontrol values in pkg_config array so it updates the values to reduce duplicate PECI transactions. Probably, combining these two functions into get_ttrottle_and_tcontrol() would look better. I'll rewrite it.

+
+static int get_die_temp(struct peci_cputemp *priv)
+{
+ÂÂÂ struct peci_get_temp_msg msg;
+ÂÂÂ int rc;
+
+ÂÂÂ if (!need_update(&priv->temp.die))
+ÂÂÂÂÂÂÂ return 0;
+
+ÂÂÂ rc = get_tjmax(priv);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂ msg.addr = priv->addr;
+
+ÂÂÂ rc = send_peci_cmd(priv, PECI_CMD_GET_TEMP, &msg);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂ priv->temp.die.value = priv->temp.tjmax.value +
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ ((s32)msg.temp_raw * 1000 / 64);
+
+ÂÂÂ mark_updated(&priv->temp.die);
+
+ÂÂÂ return 0;
+}
+
+static int get_dts_margin(struct peci_cputemp *priv)
+{
+ÂÂÂ struct peci_rd_pkg_cfg_msg msg;
+ÂÂÂ s32 dts_margin;
+ÂÂÂ int rc;
+
+ÂÂÂ if (!need_update(&priv->temp.dts_margin))
+ÂÂÂÂÂÂÂ return 0;
+
+ÂÂÂ msg.addr = priv->addr;
+ÂÂÂ msg.index = MBX_INDEX_DTS_MARGIN;
+ÂÂÂ msg.param = 0;
+ÂÂÂ msg.rx_len = 4;
+
+ÂÂÂ rc = send_peci_cmd(priv, PECI_CMD_RD_PKG_CFG, &msg);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂ dts_margin = (msg.pkg_config[1] << 8) | msg.pkg_config[0];
+
+ÂÂÂ /**
+ÂÂÂÂ * Processors return a value of DTS reading in 10.6 format
+ÂÂÂÂ * (10 bits signed decimal, 6 bits fractional).
+ÂÂÂÂ * Error codes:
+ÂÂÂÂ *ÂÂ 0x8000: General sensor error
+ÂÂÂÂ *ÂÂ 0x8001: Reserved
+ÂÂÂÂ *ÂÂ 0x8002: Underflow on reading value
+ÂÂÂÂ *ÂÂ 0x8003-0x81ff: Reserved
+ÂÂÂÂ */
+ÂÂÂ if (dts_margin >= 0x8000 && dts_margin <= 0x81ff)
+ÂÂÂÂÂÂÂ return -EIO;
+
+ÂÂÂ dts_margin = ten_dot_six_to_millidegree(dts_margin);
+
+ÂÂÂ priv->temp.dts_margin.value = dts_margin;
+
+ÂÂÂ mark_updated(&priv->temp.dts_margin);
+
+ÂÂÂ return 0;
+}
+
+static int get_core_temp(struct peci_cputemp *priv, int core_index)
+{
+ÂÂÂ struct peci_rd_pkg_cfg_msg msg;
+ÂÂÂ s32 core_dts_margin;
+ÂÂÂ int rc;
+
+ÂÂÂ if (!need_update(&priv->temp.core[core_index]))
+ÂÂÂÂÂÂÂ return 0;
+
+ÂÂÂ rc = get_tjmax(priv);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂ msg.addr = priv->addr;
+ÂÂÂ msg.index = MBX_INDEX_PER_CORE_DTS_TEMP;
+ÂÂÂ msg.param = core_index;
+ÂÂÂ msg.rx_len = 4;
+
+ÂÂÂ rc = send_peci_cmd(priv, PECI_CMD_RD_PKG_CFG, &msg);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂ core_dts_margin = (msg.pkg_config[1] << 8) | msg.pkg_config[0];
+
+ÂÂÂ /**
+ÂÂÂÂ * Processors return a value of the core DTS reading in 10.6 format
+ÂÂÂÂ * (10 bits signed decimal, 6 bits fractional).
+ÂÂÂÂ * Error codes:
+ÂÂÂÂ *ÂÂ 0x8000: General sensor error
+ÂÂÂÂ *ÂÂ 0x8001: Reserved
+ÂÂÂÂ *ÂÂ 0x8002: Underflow on reading value
+ÂÂÂÂ *ÂÂ 0x8003-0x81ff: Reserved
+ÂÂÂÂ */
+ÂÂÂ if (core_dts_margin >= 0x8000 && core_dts_margin <= 0x81ff)
+ÂÂÂÂÂÂÂ return -EIO;
+
+ÂÂÂ core_dts_margin = ten_dot_six_to_millidegree(core_dts_margin);
+
+ÂÂÂ priv->temp.core[core_index].value = priv->temp.tjmax.value +
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ core_dts_margin;
+
+ÂÂÂ mark_updated(&priv->temp.core[core_index]);
+
+ÂÂÂ return 0;
+}
+

There is a lot of duplication in those functions. Would it be possible
to find common code and use functions for it instead of duplicating
everything several times ?


Are you pointing out this code?
/**
ÂÂ* Processors return a value of the core DTS reading in 10.6 format
ÂÂ* (10 bits signed decimal, 6 bits fractional).
ÂÂ* Error codes:
ÂÂ*ÂÂ 0x8000: General sensor error
ÂÂ*ÂÂ 0x8001: Reserved
ÂÂ*ÂÂ 0x8002: Underflow on reading value
ÂÂ*ÂÂ 0x8003-0x81ff: Reserved
ÂÂ*/
if (core_dts_margin >= 0x8000 && core_dts_margin <= 0x81ff)
ÂÂÂÂÂreturn -EIO;

Then I'll rewrite it as a function. If not, please point out the duplication.


There is lots of other duplication.


Sorry but can you point out the duplication?

write a python script to do a semantic comparison.

+static int find_core_index(struct peci_cputemp *priv, int channel)
+{
+ÂÂÂ int core_channel = channel - DEFAULT_CHANNEL_NUMS;
+ÂÂÂ int idx, found = 0;
+
+ÂÂÂ for (idx = 0; idx < priv->gen_info->core_max; idx++) {
+ÂÂÂÂÂÂÂ if (priv->core_mask & BIT(idx)) {
+ÂÂÂÂÂÂÂÂÂÂÂ if (core_channel == found)
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ break;
+
+ÂÂÂÂÂÂÂÂÂÂÂ found++;
+ÂÂÂÂÂÂÂ }
+ÂÂÂ }
+
+ÂÂÂ return idx;

What if nothing is found ?


Core temperature group will be registered only when it detects at least one core checked by check_resolved_cores(), so find_core_index() can be called only when priv->core_mask has a non-zero value. The 'nothing is found' case will not happen.

That doesn't guarantee a match. If what you are saying is correct there should always be
a well defined match of channel -> idx, and the search should be unnecessary.


There could be some disabled cores in the resolved core mask bit sequence also it should remove indexing gap in channel numbering so it is the reason why this search function is needed. Well defined match of channel -> idx would not be always satisfied.

Are you saying that each call to the function, with the same parameters,
can return a different result ?

+}
+
+static int cputemp_read_string(struct device *dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ enum hwmon_sensor_types type,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ u32 attr, int channel, const char **str)
+{
+ÂÂÂ struct peci_cputemp *priv = dev_get_drvdata(dev);
+ÂÂÂ int core_index;
+
+ÂÂÂ switch (attr) {
+ÂÂÂ case hwmon_temp_label:
+ÂÂÂÂÂÂÂ if (channel < DEFAULT_CHANNEL_NUMS) {
+ÂÂÂÂÂÂÂÂÂÂÂ *str = cputemp_label[channel];
+ÂÂÂÂÂÂÂ } else {
+ÂÂÂÂÂÂÂÂÂÂÂ core_index = find_core_index(priv, channel);

FWIW, it might be better to pass channel - DEFAULT_CHANNEL_NUMS
as parameter.


cputemp_read_string() is mapped to read_string member of hwmon_ops struct, so hwmon susbsystem passes the channel parameter based on the registered channel order. Should I modify hwmon subsystem code?


Huh ? Changing
ÂÂÂÂÂf(x) { y = x - const; }
...
ÂÂÂÂÂf(x);

to
ÂÂÂÂÂf(y) { }
...
ÂÂÂÂÂf(x - const);

requires a hwmon core change ? Really ?


Sorry for my misunderstanding. You are right. I'll change the parameter passing of find_core_index() from 'channel' to 'channel - DEFAULT_CHANNEL_NUMS'.

What if find_core_index() returns priv->gen_info->core_max, ie
if it didn't find a core ?


As explained above, find_core index() returns a correct index always.

+ÂÂÂÂÂÂÂÂÂÂÂ *str = cputemp_label[DEFAULT_CHANNEL_NUMS + core_index];
+ÂÂÂÂÂÂÂ }
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ default:
+ÂÂÂÂÂÂÂ return -EOPNOTSUPP;
+ÂÂÂ }
+}
+
+static int cputemp_read_die(struct device *dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ enum hwmon_sensor_types type,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ u32 attr, int channel, long *val)
+{
+ÂÂÂ struct peci_cputemp *priv = dev_get_drvdata(dev);
+ÂÂÂ int rc;
+
+ÂÂÂ switch (attr) {
+ÂÂÂ case hwmon_temp_input:
+ÂÂÂÂÂÂÂ rc = get_die_temp(priv);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.die.value;
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ case hwmon_temp_max:
+ÂÂÂÂÂÂÂ rc = get_tcontrol(priv);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.tcontrol.value;
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ case hwmon_temp_crit:
+ÂÂÂÂÂÂÂ rc = get_tjmax(priv);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.tjmax.value;
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ case hwmon_temp_crit_hyst:
+ÂÂÂÂÂÂÂ rc = get_tcontrol(priv);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.tjmax.value - priv->temp.tcontrol.value;
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ default:
+ÂÂÂÂÂÂÂ return -EOPNOTSUPP;
+ÂÂÂ }
+}
+
+static int cputemp_read_dts_margin(struct device *dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ enum hwmon_sensor_types type,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ u32 attr, int channel, long *val)
+{
+ÂÂÂ struct peci_cputemp *priv = dev_get_drvdata(dev);
+ÂÂÂ int rc;
+
+ÂÂÂ switch (attr) {
+ÂÂÂ case hwmon_temp_input:
+ÂÂÂÂÂÂÂ rc = get_dts_margin(priv);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.dts_margin.value;
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ case hwmon_temp_min:
+ÂÂÂÂÂÂÂ *val = 0;
+ÂÂÂÂÂÂÂ return 0;

This attribute should not exist.


This is an attribute of DTS margin temperature which reflects thermal margin to Tcontrol of the CPU package. If it shows '0' means it reached to Tcontrol, the first level of thermal warning. If the CPU keeps getting hot then this DTS margin shows a negative value until it reaches to Tjmax. When the temperature reaches to Tjmax at last then it shows the lower critcal value which lcrit indicates as the second level of thermal warning.


The hwmon ABI reports chip values, not constants. Even though some drivers do
it, reporting a constant is always wrong.

+ÂÂÂ case hwmon_temp_lcrit:
+ÂÂÂÂÂÂÂ rc = get_tcontrol(priv);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.tcontrol.value - priv->temp.tjmax.value;

lcrit is tcontrol - tjmax, and crit_hyst above is
tjmax - tcontrol ? How does this make sense ?


Both Tjmax and Tcontrol have positive values and Tjmax is greater than Tcontrol always. As explained above, lcrit of DTS margin should show a negative value means the margin goes down across '0'. On the other hand, crit_hyst of Die temperature should show absolute hyterisis value between Tcontrol and Tjmax.

The hwmon ABI requires reporting of absolute temperatures in milli-degrees C.
Your statements make it very clear that this driver does not report
absolute temperatures. This is not acceptable.


Okay. I'll remove the 'DTS margin' temperature. All others are reporting absolute temperatures.

+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ default:
+ÂÂÂÂÂÂÂ return -EOPNOTSUPP;
+ÂÂÂ }
+}
+
+static int cputemp_read_tcontrol(struct device *dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ enum hwmon_sensor_types type,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ u32 attr, int channel, long *val)
+{
+ÂÂÂ struct peci_cputemp *priv = dev_get_drvdata(dev);
+ÂÂÂ int rc;
+
+ÂÂÂ switch (attr) {
+ÂÂÂ case hwmon_temp_input:
+ÂÂÂÂÂÂÂ rc = get_tcontrol(priv);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.tcontrol.value;
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ case hwmon_temp_crit:
+ÂÂÂÂÂÂÂ rc = get_tjmax(priv);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.tjmax.value;
+ÂÂÂÂÂÂÂ return 0;

Am I missing something, or is the same temperature reported several times ?
tjmax is also reported as temp_crit cputemp_read_die(), for example.


This driver provides multiple channels and each channel has its own supplement attributes. As you mentioned, Die temperature channel and Core temperature channel have their individual crit attributes and they reflect the same value, Tjmax. It is not reporting several times but reporting the same value.

Then maybe fold the functions accordingly ?


I'll use a single function for 'Die temperature' and 'Core temperature' that have the same attributes set. It would simplify this code a bit.

+ÂÂÂ default:
+ÂÂÂÂÂÂÂ return -EOPNOTSUPP;
+ÂÂÂ }
+}
+
+static int cputemp_read_tthrottle(struct device *dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ enum hwmon_sensor_types type,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ u32 attr, int channel, long *val)
+{
+ÂÂÂ struct peci_cputemp *priv = dev_get_drvdata(dev);
+ÂÂÂ int rc;
+
+ÂÂÂ switch (attr) {
+ÂÂÂ case hwmon_temp_input:
+ÂÂÂÂÂÂÂ rc = get_tthrottle(priv);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.tthrottle.value;
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ default:
+ÂÂÂÂÂÂÂ return -EOPNOTSUPP;
+ÂÂÂ }
+}
+
+static int cputemp_read_tjmax(struct device *dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ enum hwmon_sensor_types type,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ u32 attr, int channel, long *val)
+{
+ÂÂÂ struct peci_cputemp *priv = dev_get_drvdata(dev);
+ÂÂÂ int rc;
+
+ÂÂÂ switch (attr) {
+ÂÂÂ case hwmon_temp_input:
+ÂÂÂÂÂÂÂ rc = get_tjmax(priv);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.tjmax.value;
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ default:
+ÂÂÂÂÂÂÂ return -EOPNOTSUPP;
+ÂÂÂ }
+}
+
+static int cputemp_read_core(struct device *dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ enum hwmon_sensor_types type,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ u32 attr, int channel, long *val)
+{
+ÂÂÂ struct peci_cputemp *priv = dev_get_drvdata(dev);
+ÂÂÂ int core_index = find_core_index(priv, channel);
+ÂÂÂ int rc;
+
+ÂÂÂ switch (attr) {
+ÂÂÂ case hwmon_temp_input:
+ÂÂÂÂÂÂÂ rc = get_core_temp(priv, core_index);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.core[core_index].value;
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ case hwmon_temp_max:
+ÂÂÂÂÂÂÂ rc = get_tcontrol(priv);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.tcontrol.value;
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ case hwmon_temp_crit:
+ÂÂÂÂÂÂÂ rc = get_tjmax(priv);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.tjmax.value;
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ case hwmon_temp_crit_hyst:
+ÂÂÂÂÂÂÂ rc = get_tcontrol(priv);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp.tjmax.value - priv->temp.tcontrol.value;
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ default:
+ÂÂÂÂÂÂÂ return -EOPNOTSUPP;
+ÂÂÂ }
+}

There is again a lot of duplication in those functions.


Each function is called from cputemp_read() which is mapped to read function pointer of hwmon_ops struct. Since each channel has different set of attributes so the cputemp_read() calls an individual channel handler after checking the channel type. Of course, we can handle all attributes of all channels in a single function but the way also needs channel type checking code on each attribute.

+
+static int cputemp_read(struct device *dev,
+ÂÂÂÂÂÂÂÂÂÂÂ enum hwmon_sensor_types type,
+ÂÂÂÂÂÂÂÂÂÂÂ u32 attr, int channel, long *val)
+{
+ÂÂÂ switch (channel) {
+ÂÂÂ case channel_die:
+ÂÂÂÂÂÂÂ return cputemp_read_die(dev, type, attr, channel, val);
+ÂÂÂ case channel_dts_mrgn:
+ÂÂÂÂÂÂÂ return cputemp_read_dts_margin(dev, type, attr, channel, val);
+ÂÂÂ case channel_tcontrol:
+ÂÂÂÂÂÂÂ return cputemp_read_tcontrol(dev, type, attr, channel, val);
+ÂÂÂ case channel_tthrottle:
+ÂÂÂÂÂÂÂ return cputemp_read_tthrottle(dev, type, attr, channel, val);
+ÂÂÂ case channel_tjmax:
+ÂÂÂÂÂÂÂ return cputemp_read_tjmax(dev, type, attr, channel, val);
+ÂÂÂ default:
+ÂÂÂÂÂÂÂ if (channel < CPUTEMP_CHANNEL_NUMS)
+ÂÂÂÂÂÂÂÂÂÂÂ return cputemp_read_core(dev, type, attr, channel, val);
+
+ÂÂÂÂÂÂÂ return -EOPNOTSUPP;
+ÂÂÂ }
+}
+
+static umode_t cputemp_is_visible(const void *data,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ enum hwmon_sensor_types type,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ u32 attr, int channel)
+{
+ÂÂÂ const struct peci_cputemp *priv = data;
+
+ÂÂÂ if (priv->temp_config[channel] & BIT(attr))
+ÂÂÂÂÂÂÂ return 0444;
+
+ÂÂÂ return 0;
+}
+
+static const struct hwmon_ops cputemp_ops = {
+ÂÂÂ .is_visible = cputemp_is_visible,
+ÂÂÂ .read_string = cputemp_read_string,
+ÂÂÂ .read = cputemp_read,
+};
+
+static int check_resolved_cores(struct peci_cputemp *priv)
+{
+ÂÂÂ struct peci_rd_pci_cfg_local_msg msg;
+ÂÂÂ int rc;
+
+ÂÂÂ if (!(priv->client->adapter->cmd_mask & BIT(PECI_CMD_RD_PCI_CFG_LOCAL)))
+ÂÂÂÂÂÂÂ return -EINVAL;
+
+ÂÂÂ /* Get the RESOLVED_CORES register value */
+ÂÂÂ msg.addr = priv->addr;
+ÂÂÂ msg.bus = 1;
+ÂÂÂ msg.device = 30;
+ÂÂÂ msg.function = 3;
+ÂÂÂ msg.reg = 0xB4;

Can this be made less magic with some defines ?


Sure, will use defines instead.

+ÂÂÂ msg.rx_len = 4;
+
+ÂÂÂ rc = send_peci_cmd(priv, PECI_CMD_RD_PCI_CFG_LOCAL, &msg);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂ priv->core_mask = msg.pci_config[3] << 24 |
+ÂÂÂÂÂÂÂÂÂÂÂÂÂ msg.pci_config[2] << 16 |
+ÂÂÂÂÂÂÂÂÂÂÂÂÂ msg.pci_config[1] << 8 |
+ÂÂÂÂÂÂÂÂÂÂÂÂÂ msg.pci_config[0];
+
+ÂÂÂ if (!priv->core_mask)
+ÂÂÂÂÂÂÂ return -EAGAIN;
+
+ÂÂÂ dev_dbg(priv->dev, "Scanned resolved cores: 0x%x\n", priv->core_mask);
+ÂÂÂ return 0;
+}
+
+static int create_core_temp_info(struct peci_cputemp *priv)
+{
+ÂÂÂ int rc, i;
+
+ÂÂÂ rc = check_resolved_cores(priv);
+ÂÂÂ if (!rc) {
+ÂÂÂÂÂÂÂ for (i = 0; i < priv->gen_info->core_max; i++) {
+ÂÂÂÂÂÂÂÂÂÂÂ if (priv->core_mask & BIT(i)) {
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ priv->temp_config[priv->config_idx++] =
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ config_table[channel_core];
+ÂÂÂÂÂÂÂÂÂÂÂ }
+ÂÂÂÂÂÂÂ }
+ÂÂÂ }
+
+ÂÂÂ return rc;
+}
+
+static int check_cpu_id(struct peci_cputemp *priv)
+{
+ÂÂÂ struct peci_rd_pkg_cfg_msg msg;
+ÂÂÂ u32 cpu_id;
+ÂÂÂ int i, rc;
+
+ÂÂÂ msg.addr = priv->addr;
+ÂÂÂ msg.index = MBX_INDEX_CPU_ID;
+ÂÂÂ msg.param = PKG_ID_CPU_ID;
+ÂÂÂ msg.rx_len = 4;
+
+ÂÂÂ rc = send_peci_cmd(priv, PECI_CMD_RD_PKG_CFG, &msg);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂ cpu_id = ((msg.pkg_config[2] << 16) | (msg.pkg_config[1] << 8) |
+ÂÂÂÂÂÂÂÂÂ msg.pkg_config[0]) & CLIENT_CPU_ID_MASK;
+
+ÂÂÂ for (i = 0; i < CPU_GEN_MAX; i++) {
+ÂÂÂÂÂÂÂ if (cpu_id == cpu_gen_info_table[i].cpu_id) {
+ÂÂÂÂÂÂÂÂÂÂÂ priv->gen_info = &cpu_gen_info_table[i];
+ÂÂÂÂÂÂÂÂÂÂÂ break;
+ÂÂÂÂÂÂÂ }
+ÂÂÂ }
+
+ÂÂÂ if (!priv->gen_info)
+ÂÂÂÂÂÂÂ return -ENODEV;
+
+ÂÂÂ dev_dbg(priv->dev, "CPU_ID: 0x%x\n", cpu_id);
+ÂÂÂ return 0;
+}
+
+static int peci_cputemp_probe(struct peci_client *client)
+{
+ÂÂÂ struct device *dev = &client->dev;
+ÂÂÂ struct peci_cputemp *priv;
+ÂÂÂ struct device *hwmon_dev;
+ÂÂÂ int rc;
+
+ÂÂÂ if ((client->adapter->cmd_mask &
+ÂÂÂÂÂÂÂ (BIT(PECI_CMD_GET_TEMP) | BIT(PECI_CMD_RD_PKG_CFG))) !=
+ÂÂÂÂÂÂÂ (BIT(PECI_CMD_GET_TEMP) | BIT(PECI_CMD_RD_PKG_CFG))) {
+ÂÂÂÂÂÂÂ dev_err(dev, "Client doesn't support temperature monitoring\n");
+ÂÂÂÂÂÂÂ return -EINVAL;

Does this mean there will be an error message for each non-supported CPU ?
Why ?


For proper operation of this driver, PECI_CMD_GET_TEMP and PECI_CMD_RD_PKG_CFG have to be supported by a client CPU. PECI_CMD_GET_TEMP is provided as a default command but PECI_CMD_RD_PKG_CFG depends on PECI minor revision of a CPU package so this checking is needed.


I do not question the check. I question the error message and error return value.
Why is it an _error_ if the CPU does not support the functionality, and why does
it have to be reported in the kernel log ?


Got it. I'll change that to dev_dbg.

+ÂÂÂ }
+
+ÂÂÂ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ÂÂÂ if (!priv)
+ÂÂÂÂÂÂÂ return -ENOMEM;
+
+ÂÂÂ dev_set_drvdata(dev, priv);
+ÂÂÂ priv->client = client;
+ÂÂÂ priv->dev = dev;
+ÂÂÂ priv->addr = client->addr;
+ÂÂÂ priv->cpu_no = priv->addr - PECI_BASE_ADDR;
+
+ÂÂÂ snprintf(priv->name, PECI_NAME_SIZE, "peci_cputemp.cpu%d",
+ÂÂÂÂÂÂÂÂ priv->cpu_no);
+
+ÂÂÂ rc = check_cpu_id(priv);
+ÂÂÂ if (rc) {
+ÂÂÂÂÂÂÂ dev_err(dev, "Client CPU is not supported\n");

-ENODEV is not an error, and should not result in an error message.
Besides, the error can also be propagated from peci core code,
and may well be something else.


Got it. I'll remove the error message and will add a proper handling code into PECI core.

+ÂÂÂÂÂÂÂ return rc;
+ÂÂÂ }
+
+ÂÂÂ priv->temp_config[priv->config_idx++] = config_table[channel_die];
+ÂÂÂ priv->temp_config[priv->config_idx++] = config_table[channel_dts_mrgn];
+ÂÂÂ priv->temp_config[priv->config_idx++] = config_table[channel_tcontrol];
+ÂÂÂ priv->temp_config[priv->config_idx++] = config_table[channel_tthrottle];
+ÂÂÂ priv->temp_config[priv->config_idx++] = config_table[channel_tjmax];
+
+ÂÂÂ rc = create_core_temp_info(priv);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ dev_dbg(dev, "Failed to create core temp info\n");

Then what ? Shouldn't this result in probe deferral or something more useful
instead of just being ignored ?


This driver can't support core temperature monitoring if a CPU doesn't support PECI_CMD_RD_PCI_CFG_LOCAL command. In that case, it skips core temperature group creation and supports only basic temperature monitoring of Die, DTS margin and etc. I'll add this description as a comment.


The message says "Failed to ...". It does not say "This CPU does not support ...".


Got it. Will correct the message.

+
+ÂÂÂ priv->chip.ops = &cputemp_ops;
+ÂÂÂ priv->chip.info = priv->info;
+
+ÂÂÂ priv->info[0] = &priv->temp_info;
+
+ÂÂÂ priv->temp_info.type = hwmon_temp;
+ÂÂÂ priv->temp_info.config = priv->temp_config;
+
+ÂÂÂ hwmon_dev = devm_hwmon_device_register_with_info(priv->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ priv->name,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ priv,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ &priv->chip,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ NULL);
+
+ÂÂÂ if (IS_ERR(hwmon_dev))
+ÂÂÂÂÂÂÂ return PTR_ERR(hwmon_dev);
+
+ÂÂÂ dev_dbg(dev, "%s: sensor '%s'\n", dev_name(hwmon_dev), priv->name);
+

Why does this message display the device name twice ?


For an example, dev_name(hwmon_dev) shows 'hwmon5' and priv->name shows 'peci-cputemp0'.

And dev_dbg() shows another device name. So you'll have something like

peci-cputemp0: hwmon5: sensor 'peci-cputemp0'

+ÂÂÂ return 0;
+}
+
+static const struct of_device_id peci_cputemp_of_table[] = {
+ÂÂÂ { .compatible = "intel,peci-cputemp" },
+ÂÂÂ { }
+};
+MODULE_DEVICE_TABLE(of, peci_cputemp_of_table);
+
+static struct peci_driver peci_cputemp_driver = {
+ .probe = peci_cputemp_probe,
+ÂÂÂ .driver = {
+ÂÂÂÂÂÂÂ .nameÂÂÂÂÂÂÂÂÂÂ = "peci-cputemp",
+ÂÂÂÂÂÂÂ .of_match_table = of_match_ptr(peci_cputemp_of_table),
+ÂÂÂ },
+};
+module_peci_driver(peci_cputemp_driver);
+
+MODULE_AUTHOR("Jae Hyun Yoo <jae.hyun.yoo@xxxxxxxxxxxxxxx>");
+MODULE_DESCRIPTION("PECI cputemp driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwmon/peci-dimmtemp.c b/drivers/hwmon/peci-dimmtemp.c
new file mode 100644
index 000000000000..78bf29cb2c4c
--- /dev/null
+++ b/drivers/hwmon/peci-dimmtemp.c

FWIW, this should be two separate patches.


Should I split out hwmon documents and dt bindings too?

@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Intel Corporation
+
+#include <linux/delay.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>

Needed ?


No. Will drop the line.

+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/peci.h>
+#include <linux/workqueue.h>
+
+#define TEMP_TYPE_PECIÂÂÂÂÂÂ 6Â /* Sensor type 6: Intel PECI */
+
+#define CHAN_RANK_MAX_ON_HSX 8Â /* Max number of channel ranks on Haswell */
+#define DIMM_IDX_MAX_ON_HSXÂ 3Â /* Max DIMM index per channel on Haswell */
+
+#define CHAN_RANK_MAX_ON_BDX 4Â /* Max number of channel ranks on Broadwell */
+#define DIMM_IDX_MAX_ON_BDXÂ 3Â /* Max DIMM index per channel on Broadwell */
+
+#define CHAN_RANK_MAX_ON_SKX 6Â /* Max number of channel ranks on Skylake */
+#define DIMM_IDX_MAX_ON_SKXÂ 2Â /* Max DIMM index per channel on Skylake */
+
+#define CHAN_RANK_MAXÂÂÂÂÂÂÂ CHAN_RANK_MAX_ON_HSX
+#define DIMM_IDX_MAXÂÂÂÂÂÂÂÂ DIMM_IDX_MAX_ON_HSX
+
+#define DIMM_NUMS_MAXÂÂÂÂÂÂÂ (CHAN_RANK_MAX * DIMM_IDX_MAX)
+
+#define CLIENT_CPU_ID_MASKÂÂ 0xf0ff0Â /* Mask for Family / Model info */
+
+#define UPDATE_INTERVAL_MINÂ HZ
+
+#define DIMM_MASK_CHECK_DELAY_JIFFIES msecs_to_jiffies(5000)
+#define DIMM_MASK_CHECK_RETRY_MAXÂÂÂÂ 60 /* 60 x 5 secs = 5 minutes */
+
+enum cpu_gens {
+ÂÂÂ CPU_GEN_HSX, /* Haswell Xeon */
+ÂÂÂ CPU_GEN_BRX, /* Broadwell Xeon */
+ÂÂÂ CPU_GEN_SKX, /* Skylake Xeon */
+ÂÂÂ CPU_GEN_MAX
+};
+
+struct cpu_gen_info {
+ÂÂÂ u32 type;
+ÂÂÂ u32 cpu_id;
+ÂÂÂ u32 chan_rank_max;
+ÂÂÂ u32 dimm_idx_max;
+};
+
+struct temp_data {
+ÂÂÂ bool valid;
+ÂÂÂ s32Â value;
+ÂÂÂ unsigned long last_updated;
+};
+
+struct peci_dimmtemp {
+ÂÂÂ struct peci_client *client;
+ÂÂÂ struct device *dev;
+ÂÂÂ struct workqueue_struct *work_queue;
+ÂÂÂ struct delayed_work work_handler;
+ÂÂÂ char name[PECI_NAME_SIZE];
+ÂÂÂ struct temp_data temp[DIMM_NUMS_MAX];
+ÂÂÂ u8 addr;
+ÂÂÂ uint cpu_no;
+ÂÂÂ const struct cpu_gen_info *gen_info;
+ÂÂÂ u32 dimm_mask;
+ÂÂÂ int retry_count;
+ÂÂÂ int channels;
+ÂÂÂ u32 temp_config[DIMM_NUMS_MAX + 1];
+ÂÂÂ struct hwmon_channel_info temp_info;
+ÂÂÂ const struct hwmon_channel_info *info[2];
+ÂÂÂ struct hwmon_chip_info chip;
+};
+
+static const struct cpu_gen_info cpu_gen_info_table[] = {
+ { .type = CPU_GEN_HSX,
+ÂÂÂÂÂ .cpu_id = 0x306f0, /* Family code: 6, Model number: 63 (0x3f) */
+ÂÂÂÂÂ .chan_rank_max = CHAN_RANK_MAX_ON_HSX,
+ .dimm_idx_max = DIMM_IDX_MAX_ON_HSX },
+ { .type = CPU_GEN_BRX,
+ÂÂÂÂÂ .cpu_id = 0x406f0, /* Family code: 6, Model number: 79 (0x4f) */
+ÂÂÂÂÂ .chan_rank_max = CHAN_RANK_MAX_ON_BDX,
+ .dimm_idx_max = DIMM_IDX_MAX_ON_BDX },
+ { .type = CPU_GEN_SKX,
+ÂÂÂÂÂ .cpu_id = 0x50650, /* Family code: 6, Model number: 85 (0x55) */
+ÂÂÂÂÂ .chan_rank_max = CHAN_RANK_MAX_ON_SKX,
+ .dimm_idx_max = DIMM_IDX_MAX_ON_SKX },
+};
+
+static const char *dimmtemp_label[CHAN_RANK_MAX][DIMM_IDX_MAX] = {
+ÂÂÂ { "DIMM A0", "DIMM A1", "DIMM A2" },
+ÂÂÂ { "DIMM B0", "DIMM B1", "DIMM B2" },
+ÂÂÂ { "DIMM C0", "DIMM C1", "DIMM C2" },
+ÂÂÂ { "DIMM D0", "DIMM D1", "DIMM D2" },
+ÂÂÂ { "DIMM E0", "DIMM E1", "DIMM E2" },
+ÂÂÂ { "DIMM F0", "DIMM F1", "DIMM F2" },
+ÂÂÂ { "DIMM G0", "DIMM G1", "DIMM G2" },
+ÂÂÂ { "DIMM H0", "DIMM H1", "DIMM H2" },
+};
+
+static int send_peci_cmd(struct peci_dimmtemp *priv, enum peci_cmd cmd,
+ÂÂÂÂÂÂÂÂÂÂÂÂ void *msg)
+{
+ÂÂÂ return peci_command(priv->client->adapter, cmd, msg);
+}
+
+static int need_update(struct temp_data *temp)
+{
+ÂÂÂ if (temp->valid &&
+ÂÂÂÂÂÂÂ time_before(jiffies, temp->last_updated + UPDATE_INTERVAL_MIN))
+ÂÂÂÂÂÂÂ return 0;
+
+ÂÂÂ return 1;
+}
+
+static void mark_updated(struct temp_data *temp)
+{
+ÂÂÂ temp->valid = true;
+ÂÂÂ temp->last_updated = jiffies;
+}

It might make sense to provide the duplicate functions in a core file.


It is temperature monitoring specific function and it touches module specific variables. Do you really think that this non-generic function should be moved to PECI core?

+
+static int get_dimm_temp(struct peci_dimmtemp *priv, int dimm_no)
+{
+ÂÂÂ int dimm_order = dimm_no % priv->gen_info->dimm_idx_max;
+ÂÂÂ int chan_rank = dimm_no / priv->gen_info->dimm_idx_max;
+ÂÂÂ struct peci_rd_pkg_cfg_msg msg;
+ÂÂÂ int rc;
+
+ÂÂÂ if (!need_update(&priv->temp[dimm_no]))
+ÂÂÂÂÂÂÂ return 0;
+
+ÂÂÂ msg.addr = priv->addr;
+ÂÂÂ msg.index = MBX_INDEX_DDR_DIMM_TEMP;
+ÂÂÂ msg.param = chan_rank;
+ÂÂÂ msg.rx_len = 4;
+
+ÂÂÂ rc = send_peci_cmd(priv, PECI_CMD_RD_PKG_CFG, &msg);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂ priv->temp[dimm_no].value = msg.pkg_config[dimm_order] * 1000;
+
+ÂÂÂ mark_updated(&priv->temp[dimm_no]);
+
+ÂÂÂ return 0;
+}
+
+static int find_dimm_number(struct peci_dimmtemp *priv, int channel)
+{
+ÂÂÂ int dimm_nums_max = priv->gen_info->chan_rank_max *
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ priv->gen_info->dimm_idx_max;
+ÂÂÂ int idx, found = 0;
+
+ÂÂÂ for (idx = 0; idx < dimm_nums_max; idx++) {
+ÂÂÂÂÂÂÂ if (priv->dimm_mask & BIT(idx)) {
+ÂÂÂÂÂÂÂÂÂÂÂ if (channel == found)
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ break;
+
+ÂÂÂÂÂÂÂÂÂÂÂ found++;
+ÂÂÂÂÂÂÂ }
+ÂÂÂ }
+
+ÂÂÂ return idx;
+}

This again looks like duplicate code.


find_dimm_number()? I'm sure it isn't.

+
+static int dimmtemp_read_string(struct device *dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ enum hwmon_sensor_types type,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ u32 attr, int channel, const char **str)
+{
+ÂÂÂ struct peci_dimmtemp *priv = dev_get_drvdata(dev);
+ÂÂÂ u32 dimm_idx_max = priv->gen_info->dimm_idx_max;
+ÂÂÂ int dimm_no, chan_rank, dimm_idx;
+
+ÂÂÂ switch (attr) {
+ÂÂÂ case hwmon_temp_label:
+ÂÂÂÂÂÂÂ dimm_no = find_dimm_number(priv, channel);
+ÂÂÂÂÂÂÂ chan_rank = dimm_no / dimm_idx_max;
+ÂÂÂÂÂÂÂ dimm_idx = dimm_no % dimm_idx_max;
+ÂÂÂÂÂÂÂ *str = dimmtemp_label[chan_rank][dimm_idx];
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ default:
+ÂÂÂÂÂÂÂ return -EOPNOTSUPP;
+ÂÂÂ }
+}
+
+static int dimmtemp_read(struct device *dev, enum hwmon_sensor_types type,
+ÂÂÂÂÂÂÂÂÂÂÂÂ u32 attr, int channel, long *val)
+{
+ÂÂÂ struct peci_dimmtemp *priv = dev_get_drvdata(dev);
+ÂÂÂ int dimm_no = find_dimm_number(priv, channel);
+ÂÂÂ int rc;
+
+ÂÂÂ switch (attr) {
+ÂÂÂ case hwmon_temp_input:
+ÂÂÂÂÂÂÂ rc = get_dimm_temp(priv, dimm_no);
+ÂÂÂÂÂÂÂ if (rc)
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂÂÂÂÂ *val = priv->temp[dimm_no].value;
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ default:
+ÂÂÂÂÂÂÂ return -EOPNOTSUPP;
+ÂÂÂ }
+}
+
+static umode_t dimmtemp_is_visible(const void *data,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ enum hwmon_sensor_types type,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ u32 attr, int channel)
+{
+ÂÂÂ switch (attr) {
+ÂÂÂ case hwmon_temp_label:
+ÂÂÂ case hwmon_temp_input:
+ÂÂÂÂÂÂÂ return 0444;
+ÂÂÂ default:
+ÂÂÂÂÂÂÂ return 0;
+ÂÂÂ }
+}
+
+static const struct hwmon_ops dimmtemp_ops = {
+ÂÂÂ .is_visible = dimmtemp_is_visible,
+ÂÂÂ .read_string = dimmtemp_read_string,
+ÂÂÂ .read = dimmtemp_read,
+};
+
+static int check_populated_dimms(struct peci_dimmtemp *priv)
+{
+ÂÂÂ u32 chan_rank_max = priv->gen_info->chan_rank_max;
+ÂÂÂ u32 dimm_idx_max = priv->gen_info->dimm_idx_max;
+ÂÂÂ struct peci_rd_pkg_cfg_msg msg;
+ÂÂÂ int chan_rank, dimm_idx;
+ÂÂÂ int rc, channels = 0;
+
+ÂÂÂ for (chan_rank = 0; chan_rank < chan_rank_max; chan_rank++) {
+ÂÂÂÂÂÂÂ msg.addr = priv->addr;
+ÂÂÂÂÂÂÂ msg.index = MBX_INDEX_DDR_DIMM_TEMP;
+ÂÂÂÂÂÂÂ msg.param = chan_rank;
+ÂÂÂÂÂÂÂ msg.rx_len = 4;
+
+ÂÂÂÂÂÂÂ rc = send_peci_cmd(priv, PECI_CMD_RD_PKG_CFG, &msg);
+ÂÂÂÂÂÂÂ if (rc) {
+ÂÂÂÂÂÂÂÂÂÂÂ priv->dimm_mask = 0;
+ÂÂÂÂÂÂÂÂÂÂÂ return rc;
+ÂÂÂÂÂÂÂ }
+
+ÂÂÂÂÂÂÂ for (dimm_idx = 0; dimm_idx < dimm_idx_max; dimm_idx++) {
+ÂÂÂÂÂÂÂÂÂÂÂ if (msg.pkg_config[dimm_idx]) {
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ priv->dimm_mask |= BIT(chan_rank *
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ chan_rank_max +
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ dimm_idx);
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ channels++;
+ÂÂÂÂÂÂÂÂÂÂÂ }
+ÂÂÂÂÂÂÂ }
+ÂÂÂ }
+
+ÂÂÂ if (!priv->dimm_mask)
+ÂÂÂÂÂÂÂ return -EAGAIN;
+
+ÂÂÂ priv->channels = channels;
+
+ÂÂÂ dev_dbg(priv->dev, "Scanned populated DIMMs: 0x%x\n", priv->dimm_mask);
+ÂÂÂ return 0;
+}
+
+static int create_dimm_temp_info(struct peci_dimmtemp *priv)
+{
+ÂÂÂ struct device *hwmon_dev;
+ÂÂÂ int rc, i;
+
+ÂÂÂ rc = check_populated_dimms(priv);
+ÂÂÂ if (!rc) {

Please handle error cases first.


Sure, I'll rewrite it.

+ÂÂÂÂÂÂÂ for (i = 0; i < priv->channels; i++)
+ÂÂÂÂÂÂÂÂÂÂÂ priv->temp_config[i] = HWMON_T_LABEL | HWMON_T_INPUT;
+
+ÂÂÂÂÂÂÂ priv->chip.ops = &dimmtemp_ops;
+ÂÂÂÂÂÂÂ priv->chip.info = priv->info;
+
+ÂÂÂÂÂÂÂ priv->info[0] = &priv->temp_info;
+
+ÂÂÂÂÂÂÂ priv->temp_info.type = hwmon_temp;
+ÂÂÂÂÂÂÂ priv->temp_info.config = priv->temp_config;
+
+ÂÂÂÂÂÂÂ hwmon_dev = devm_hwmon_device_register_with_info(priv->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ priv->name,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ priv,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ &priv->chip,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ NULL);
+ÂÂÂÂÂÂÂ rc = PTR_ERR_OR_ZERO(hwmon_dev);
+ÂÂÂÂÂÂÂ if (!rc)
+ÂÂÂÂÂÂÂÂÂÂÂ dev_dbg(priv->dev, "%s: sensor '%s'\n",
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ dev_name(hwmon_dev), priv->name);
+ÂÂÂ } else if (rc == -EAGAIN) {
+ÂÂÂÂÂÂÂ if (priv->retry_count < DIMM_MASK_CHECK_RETRY_MAX) {
+ÂÂÂÂÂÂÂÂÂÂÂ queue_delayed_work(priv->work_queue,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ &priv->work_handler,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ DIMM_MASK_CHECK_DELAY_JIFFIES);
+ÂÂÂÂÂÂÂÂÂÂÂ priv->retry_count++;
+ÂÂÂÂÂÂÂÂÂÂÂ dev_dbg(priv->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "Deferred DIMM temp info creation\n");
+ÂÂÂÂÂÂÂ } else {
+ÂÂÂÂÂÂÂÂÂÂÂ rc = -ETIMEDOUT;
+ÂÂÂÂÂÂÂÂÂÂÂ dev_err(priv->dev,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ "Timeout retrying DIMM temp info creation\n");
+ÂÂÂÂÂÂÂ }
+ÂÂÂ }
+
+ÂÂÂ return rc;
+}
+
+static void create_dimm_temp_info_delayed(struct work_struct *work)
+{
+ÂÂÂ struct delayed_work *dwork = to_delayed_work(work);
+ÂÂÂ struct peci_dimmtemp *priv = container_of(dwork, struct peci_dimmtemp,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ work_handler);
+ÂÂÂ int rc;
+
+ÂÂÂ rc = create_dimm_temp_info(priv);
+ÂÂÂ if (rc && rc != -EAGAIN)
+ÂÂÂÂÂÂÂ dev_dbg(priv->dev, "Failed to create DIMM temp info\n");
+}
+
+static int check_cpu_id(struct peci_dimmtemp *priv)
+{
+ÂÂÂ struct peci_rd_pkg_cfg_msg msg;
+ÂÂÂ u32 cpu_id;
+ÂÂÂ int i, rc;
+
+ÂÂÂ msg.addr = priv->addr;
+ÂÂÂ msg.index = MBX_INDEX_CPU_ID;
+ÂÂÂ msg.param = PKG_ID_CPU_ID;
+ÂÂÂ msg.rx_len = 4;
+
+ÂÂÂ rc = send_peci_cmd(priv, PECI_CMD_RD_PKG_CFG, &msg);
+ÂÂÂ if (rc)
+ÂÂÂÂÂÂÂ return rc;
+
+ÂÂÂ cpu_id = ((msg.pkg_config[2] << 16) | (msg.pkg_config[1] << 8) |
+ÂÂÂÂÂÂÂÂÂ msg.pkg_config[0]) & CLIENT_CPU_ID_MASK;
+
+ÂÂÂ for (i = 0; i < CPU_GEN_MAX; i++) {
+ÂÂÂÂÂÂÂ if (cpu_id == cpu_gen_info_table[i].cpu_id) {
+ÂÂÂÂÂÂÂÂÂÂÂ priv->gen_info = &cpu_gen_info_table[i];
+ÂÂÂÂÂÂÂÂÂÂÂ break;
+ÂÂÂÂÂÂÂ }
+ÂÂÂ }
+
+ÂÂÂ if (!priv->gen_info)
+ÂÂÂÂÂÂÂ return -ENODEV;
+
+ÂÂÂ dev_dbg(priv->dev, "CPU_ID: 0x%x\n", cpu_id);
+ÂÂÂ return 0;
+}

More duplicate code.


Okay. In case of check_cpu_id(), it could be used as a generic PECI function. I'll move it into PECI core.

+
+static int peci_dimmtemp_probe(struct peci_client *client)
+{
+ÂÂÂ struct device *dev = &client->dev;
+ÂÂÂ struct peci_dimmtemp *priv;
+ÂÂÂ int rc;
+
+ÂÂÂ if ((client->adapter->cmd_mask &
+ÂÂÂÂÂÂÂ (BIT(PECI_CMD_GET_TEMP) | BIT(PECI_CMD_RD_PKG_CFG))) !=
+ÂÂÂÂÂÂÂ (BIT(PECI_CMD_GET_TEMP) | BIT(PECI_CMD_RD_PKG_CFG))) {

One set of ( ) is unnecessary on each side of the expression.


'&' has a precedence over '!=' but '|' doesn't. I'll rewrite it to:


Actually, that is wrong. You refer to address-of. Bit operations do have lower
precedence that comparisons. I stand corrected.

ÂÂÂÂÂif (client->adapter->cmd_mask &
ÂÂÂÂÂÂÂÂ (BIT(PECI_CMD_GET_TEMP) | BIT(PECI_CMD_RD_PKG_CFG)) !=
ÂÂÂÂÂÂÂÂ (BIT(PECI_CMD_GET_TEMP) | BIT(PECI_CMD_RD_PKG_CFG)))

+ÂÂÂÂÂÂÂ dev_err(dev, "Client doesn't support temperature monitoring\n");
+ÂÂÂÂÂÂÂ return -EINVAL;

Why is this "invalid", and why does it warrant an error message ?


Should I use -EPERM? Any suggestion?


Is it an _error_ if the CPU does not support this functionality ?


Actually, it returns from this probe() function without making any hwmon info creation so I intended to handle this case as an error. Am I wrong?


If the functionality or HW supported by the driver isn't available, it is customary
to return -ENODEV and no error message. Otherwise the kernel log would drown in
"not supported" error messages. I don't see where it would add any value to handle
this driver differently.

EINVAL Invalid argument
EPERM Operation not permitted

You'll have to work hard to convince me that any of those makes sense, and that

ENODEV No such device

doesn't. More specifically, if EINVAL makes sense, the caller did something wrong,
meaning there is a problem in the infrastructure which should get fixed.
The same is true for EPERM.

+ÂÂÂ }
+
+ÂÂÂ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ÂÂÂ if (!priv)
+ÂÂÂÂÂÂÂ return -ENOMEM;
+
+ÂÂÂ dev_set_drvdata(dev, priv);
+ÂÂÂ priv->client = client;
+ÂÂÂ priv->dev = dev;
+ÂÂÂ priv->addr = client->addr;
+ÂÂÂ priv->cpu_no = priv->addr - PECI_BASE_ADDR;

Is priv->addr guaranteed to be >= PECI_BASE_ADDR ?

Client address range validation will be done in peci_check_addr_validity() in PECI core before probing a device driver.

+
+ÂÂÂ snprintf(priv->name, PECI_NAME_SIZE, "peci_dimmtemp.cpu%d",
+ÂÂÂÂÂÂÂÂ priv->cpu_no);
+
+ÂÂÂ rc = check_cpu_id(priv);
+ÂÂÂ if (rc) {
+ÂÂÂÂÂÂÂ dev_err(dev, "Client CPU is not supported\n");

Or the peci command failed.


I'll remove the error message and will add a proper handling code into PECI core on each error type.

+ÂÂÂÂÂÂÂ return rc;
+ÂÂÂ }
+
+ÂÂÂ priv->work_queue = alloc_ordered_workqueue(priv->name, 0);
+ÂÂÂ if (!priv->work_queue)
+ÂÂÂÂÂÂÂ return -ENOMEM;
+
+ÂÂÂ INIT_DELAYED_WORK(&priv->work_handler, create_dimm_temp_info_delayed);
+
+ÂÂÂ rc = create_dimm_temp_info(priv);
+ÂÂÂ if (rc && rc != -EAGAIN) {
+ÂÂÂÂÂÂÂ dev_err(dev, "Failed to create DIMM temp info\n");
+ÂÂÂÂÂÂÂ goto err_free_wq;
+ÂÂÂ }
+
+ÂÂÂ return 0;
+
+err_free_wq:
+ÂÂÂ destroy_workqueue(priv->work_queue);
+ÂÂÂ return rc;
+}
+
+static int peci_dimmtemp_remove(struct peci_client *client)
+{
+ÂÂÂ struct peci_dimmtemp *priv = dev_get_drvdata(&client->dev);
+
+ÂÂÂ cancel_delayed_work(&priv->work_handler);

cancel_delayed_work_sync() ?


Yes, it would be safer. Will fix it.

+ÂÂÂ destroy_workqueue(priv->work_queue);
+
+ÂÂÂ return 0;
+}
+
+static const struct of_device_id peci_dimmtemp_of_table[] = {
+ÂÂÂ { .compatible = "intel,peci-dimmtemp" },
+ÂÂÂ { }
+};
+MODULE_DEVICE_TABLE(of, peci_dimmtemp_of_table);
+
+static struct peci_driver peci_dimmtemp_driver = {
+ .probe = peci_dimmtemp_probe,
+ÂÂÂ .remove = peci_dimmtemp_remove,
+ÂÂÂ .driver = {
+ÂÂÂÂÂÂÂ .nameÂÂÂÂÂÂÂÂÂÂ = "peci-dimmtemp",
+ÂÂÂÂÂÂÂ .of_match_table = of_match_ptr(peci_dimmtemp_of_table),
+ÂÂÂ },
+};
+module_peci_driver(peci_dimmtemp_driver);
+
+MODULE_AUTHOR("Jae Hyun Yoo <jae.hyun.yoo@xxxxxxxxxxxxxxx>");
+MODULE_DESCRIPTION("PECI dimmtemp driver");
+MODULE_LICENSE("GPL v2");
--
2.16.2

--
To unsubscribe from this list: send the line "unsubscribe linux-hwmon" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html