Re: [lm-sensors] [PATCH] hwmon: Add driver for VIA CPU core temperature
From: Juerg Haefliger
Date: Sat Jun 27 2009 - 14:40:59 EST
Hi Jean,
> Hi Harald,
>
> On Tue, 9 Jun 2009 16:34:06 +0800, Harald Welte wrote:
>> This is a driver for the on-die digital temperature sensor of
>> VIA's recent CPU models.
>>
>> Signed-off-by: Harald Welte <HaraldWelte@xxxxxxxxxxx>
>> ---
>> Âdrivers/hwmon/Kconfig    |  Â8 +
>> Âdrivers/hwmon/Makefile   Â|  Â1 +
>> Âdrivers/hwmon/via-cputemp.c | Â354 +++++++++++++++++++++++++++++++++++++++++++
>> Â3 files changed, 363 insertions(+), 0 deletions(-)
>> Âcreate mode 100644 drivers/hwmon/via-cputemp.c
>
> Interesting. I'm adding Juerg Haefliger in Cc. A few months ago, Juerg
> submitted a driver named "c7temp" doing basically the same, with
> mitigated success. I seem to remember that Juerg's driver was also
> displaying either the VID or Vcore for the CPU. We don't want to have
> two drivers for the same hardware, so let's merge everything in one
> driver and push this upstream.
>
> Juerg, you were there first, so I'd like to hear you on this. Are there
> differences between Harald'd driver and yours? Which one should I pick?
Personally I don't care which driver you pick as long as we get one
into mainline eventually. Just a few comments:
1) My driver uses the CPUID instruction to read the performance
registers that contain the temp and voltage data. Harald's driver
reads MSRs. I don't know if there are any benefits of using one method
over the other.
2) If we pick Harald's, it would be nice if his driver can also read
and export the CPU core voltage.
3) Quite a few testers of my driver reported 0 temp readings for some
C7 CPUs. I was never able to figure out why some CPUs return 0 temp
but I'm guessing it depends on the thermal monitor settings. I'd like
to understand what is going on and hope Harald can shed some light.
...juerg
> For now, here's a review of Harald's driver:
>
>>
>> diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
>> index d73f5f4..e863833 100644
>> --- a/drivers/hwmon/Kconfig
>> +++ b/drivers/hwmon/Kconfig
>> @@ -787,6 +787,14 @@ config SENSORS_THMC50
>> Â Â Â Â This driver can also be built as a module. ÂIf so, the module
>> Â Â Â Â will be called thmc50.
>>
>> +config SENSORS_VIA_CPUTEMP
>> + Â Â tristate "VIA CPU temperature sensor"
>> + Â Â depends on X86
>> + Â Â help
>> + Â Â Â If you say yes here you get support for the temperature
>> + Â Â Â sensor inside your CPU. Supported all are all known variants
>> + Â Â Â of the VIA C7 and Nano.
>> +
>> Âconfig SENSORS_VIA686A
>> Â Â Â tristate "VIA686A"
>> Â Â Â depends on PCI
>> diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
>> index 0ae2698..3edf7fa 100644
>> --- a/drivers/hwmon/Makefile
>> +++ b/drivers/hwmon/Makefile
>> @@ -82,6 +82,7 @@ obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o
>> Âobj-$(CONFIG_SENSORS_SMSC47M1) Â Â Â += smsc47m1.o
>> Âobj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o
>> Âobj-$(CONFIG_SENSORS_THMC50) += thmc50.o
>> +obj-$(CONFIG_SENSORS_VIA_CPUTEMP)+= via-cputemp.o
>> Âobj-$(CONFIG_SENSORS_VIA686A) Â Â Â Â+= via686a.o
>> Âobj-$(CONFIG_SENSORS_VT1211) += vt1211.o
>> Âobj-$(CONFIG_SENSORS_VT8231) += vt8231.o
>> diff --git a/drivers/hwmon/via-cputemp.c b/drivers/hwmon/via-cputemp.c
>> new file mode 100644
>> index 0000000..1032355
>> --- /dev/null
>> +++ b/drivers/hwmon/via-cputemp.c
>> @@ -0,0 +1,354 @@
>> +/*
>> + * via-cputemp.c - Driver for VIA CPU core temperature monitoring
>> + * Copyright (C) 2009 VIA Technologies, Inc.
>> + *
>> + * based on existing coretemp.c, which is
>> + *
>> + * Copyright (C) 2007 Rudolf Marek <r.marek@xxxxxxxxxxxx>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; version 2 of the License.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ÂSee the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> + * 02110-1301 USA.
>> + */
>> +
>> +#include <linux/module.h>
>> +#include <linux/delay.h>
>> +#include <linux/init.h>
>> +#include <linux/slab.h>
>> +#include <linux/jiffies.h>
>> +#include <linux/hwmon.h>
>> +#include <linux/sysfs.h>
>> +#include <linux/hwmon-sysfs.h>
>> +#include <linux/err.h>
>> +#include <linux/mutex.h>
>> +#include <linux/list.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/cpu.h>
>> +#include <asm/msr.h>
>> +#include <asm/processor.h>
>> +
>> +#define DRVNAME Â Â Â"via-cputemp"
>> +
>> +typedef enum { SHOW_TEMP, SHOW_LABEL, SHOW_NAME } SHOW;
>
> No typedef in kernel code please, an enum is enough.
>
>> +
>> +/*
>> + * Functions declaration
>> + */
>> +
>> +struct via_cputemp_data {
>> + Â Â struct device *hwmon_dev;
>> + Â Â const char *name;
>> + Â Â u32 id;
>> + Â Â u32 msr;
>> +};
>> +
>> +/*
>> + * Sysfs stuff
>> + */
>> +
>> +static ssize_t show_name(struct device *dev, struct device_attribute
>> + Â Â Â Â Â Â Â Â Â Â Â *devattr, char *buf)
>> +{
>> + Â Â int ret;
>> + Â Â struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
>> + Â Â struct via_cputemp_data *data = dev_get_drvdata(dev);
>> +
>> + Â Â if (attr->index == SHOW_NAME)
>> + Â Â Â Â Â Â ret = sprintf(buf, "%s\n", data->name);
>> +   else  Â/* show label */
>> + Â Â Â Â Â Â ret = sprintf(buf, "Core %d\n", data->id);
>> + Â Â return ret;
>> +}
>> +
>> +static ssize_t show_temp(struct device *dev,
>> + Â Â Â Â Â Â Â Â Â Â Âstruct device_attribute *devattr, char *buf)
>> +{
>> + Â Â struct via_cputemp_data *data = dev_get_drvdata(dev);
>> + Â Â u32 eax, edx;
>> + Â Â int err;
>> +
>> + Â Â err = rdmsr_safe_on_cpu(data->id, data->msr, &eax, &edx);
>> + Â Â if (err)
>> + Â Â Â Â Â Â return -EAGAIN;
>> +
>> + Â Â err = sprintf(buf, "%d\n", eax & 0xffffff);
>> +
>> + Â Â return err;
>
> return sprintf()... would be clearer, as the returned value is never an
> error in this case.
>
> Also note that in theory the result could overflow once multiplied by
> 1000.
>
>> +}
>> +
>> +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL,
>> + Â Â Â Â Â Â Â Â Â Â Â SHOW_TEMP);
>> +static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, show_name, NULL, SHOW_LABEL);
>> +static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, SHOW_NAME);
>> +
>> +static struct attribute *via_cputemp_attributes[] = {
>> + Â Â &sensor_dev_attr_name.dev_attr.attr,
>> + Â Â &sensor_dev_attr_temp1_label.dev_attr.attr,
>> + Â Â &sensor_dev_attr_temp1_input.dev_attr.attr,
>> + Â Â NULL
>> +};
>> +
>> +static const struct attribute_group via_cputemp_group = {
>> + Â Â .attrs = via_cputemp_attributes,
>> +};
>> +
>> +static int __devinit via_cputemp_probe(struct platform_device *pdev)
>> +{
>> + Â Â struct via_cputemp_data *data;
>> + Â Â struct cpuinfo_x86 *c = &cpu_data(pdev->id);
>> + Â Â int err;
>> + Â Â u32 eax, edx;
>> +
>> + Â Â if (!(data = kzalloc(sizeof(struct via_cputemp_data), GFP_KERNEL))) {
>
> ERROR: do not use assignment in if condition
>
>> + Â Â Â Â Â Â err = -ENOMEM;
>> + Â Â Â Â Â Â dev_err(&pdev->dev, "Out of memory\n");
>> + Â Â Â Â Â Â goto exit;
>> + Â Â }
>> +
>> + Â Â data->id = pdev->id;
>
> pdev->id is an unsigned int, so shouldn't data->id be too?
>
>> + Â Â data->name = "via-cputemp";
>
> This must be made "via_cputemp", as dashes aren't accepted in hwmon
> device names.
>
>> +
>> + Â Â switch (c->x86_model) {
>> + Â Â case 0xA:
>> + Â Â Â Â Â Â /* C7 A */
>> + Â Â case 0xD:
>> + Â Â Â Â Â Â /* C7 D */
>> + Â Â Â Â Â Â data->msr = 0x1169;
>> + Â Â Â Â Â Â break;
>> + Â Â case 0xF:
>> + Â Â Â Â Â Â /* Nano */
>> + Â Â Â Â Â Â data->msr = 0x1423;
>> + Â Â Â Â Â Â break;
>> + Â Â default:
>> + Â Â Â Â Â Â err = -ENODEV;
>> + Â Â Â Â Â Â goto exit_free;
>> + Â Â }
>> +
>> + Â Â /* test if we can access the TEMPERATURE MSR */
>> + Â Â err = rdmsr_safe_on_cpu(data->id, data->msr, &eax, &edx);
>> + Â Â if (err) {
>
> In the runtime read function, you handle errors with -EAGAIN, which
> means transient errors are expected. If such an error happens at
> registration time, we could see random failures. That's confusing for
> the user. I believe you should either skip the test at registration
> (are there really CPU models where it always fails?) or try more than
> once to rule out temporary failures.
>
>> + Â Â Â Â Â Â dev_err(&pdev->dev,
>> + Â Â Â Â Â Â Â Â Â Â "Unable to access TEMPERATURE MSR, giving up\n");
>> + Â Â Â Â Â Â goto exit_free;
>> + Â Â }
>> +
>> + Â Â platform_set_drvdata(pdev, data);
>> +
>> + Â Â if ((err = sysfs_create_group(&pdev->dev.kobj, &via_cputemp_group)))
>
> ERROR: do not use assignment in if condition
>
>> + Â Â Â Â Â Â goto exit_free;
>> +
>> + Â Â data->hwmon_dev = hwmon_device_register(&pdev->dev);
>> + Â Â if (IS_ERR(data->hwmon_dev)) {
>> + Â Â Â Â Â Â err = PTR_ERR(data->hwmon_dev);
>> + Â Â Â Â Â Â dev_err(&pdev->dev, "Class registration failed (%d)\n",
>> + Â Â Â Â Â Â Â Â Â Â err);
>> + Â Â Â Â Â Â goto exit_class;
>> + Â Â }
>> +
>> + Â Â return 0;
>> +
>> +exit_class:
>> + Â Â sysfs_remove_group(&pdev->dev.kobj, &via_cputemp_group);
>> +exit_free:
>
> A platform_set_drvdata(pdev, NULL) would be welcome here.
>
>> + Â Â kfree(data);
>
> This is a little inconsistent, as the first label refers to the last
> action that succeeded and the second label refers to the first cleanup
> step to execute. Renaming exit_class to exit_remove would help.
>
>> +exit:
>> + Â Â return err;
>> +}
>> +
>> +static int __devexit via_cputemp_remove(struct platform_device *pdev)
>> +{
>> + Â Â struct via_cputemp_data *data = platform_get_drvdata(pdev);
>> +
>> + Â Â hwmon_device_unregister(data->hwmon_dev);
>> + Â Â sysfs_remove_group(&pdev->dev.kobj, &via_cputemp_group);
>> + Â Â platform_set_drvdata(pdev, NULL);
>> + Â Â kfree(data);
>> + Â Â return 0;
>> +}
>> +
>> +static struct platform_driver via_cputemp_driver = {
>> + Â Â .driver = {
>> + Â Â Â Â Â Â .owner = THIS_MODULE,
>> + Â Â Â Â Â Â .name = DRVNAME,
>> + Â Â },
>> + Â Â .probe = via_cputemp_probe,
>> + Â Â .remove = __devexit_p(via_cputemp_remove),
>> +};
>> +
>> +struct pdev_entry {
>> + Â Â struct list_head list;
>> + Â Â struct platform_device *pdev;
>> + Â Â unsigned int cpu;
>> +};
>> +
>> +static LIST_HEAD(pdev_list);
>> +static DEFINE_MUTEX(pdev_list_mutex);
>> +
>> +static int __cpuinit via_cputemp_device_add(unsigned int cpu)
>> +{
>> + Â Â int err;
>> + Â Â struct platform_device *pdev;
>> + Â Â struct pdev_entry *pdev_entry;
>> +
>> + Â Â pdev = platform_device_alloc(DRVNAME, cpu);
>> + Â Â if (!pdev) {
>> + Â Â Â Â Â Â err = -ENOMEM;
>> + Â Â Â Â Â Â printk(KERN_ERR DRVNAME ": Device allocation failed\n");
>> + Â Â Â Â Â Â goto exit;
>> + Â Â }
>> +
>> + Â Â pdev_entry = kzalloc(sizeof(struct pdev_entry), GFP_KERNEL);
>> + Â Â if (!pdev_entry) {
>> + Â Â Â Â Â Â err = -ENOMEM;
>> + Â Â Â Â Â Â goto exit_device_put;
>> + Â Â }
>> +
>> + Â Â err = platform_device_add(pdev);
>> + Â Â if (err) {
>> + Â Â Â Â Â Â printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n",
>> + Â Â Â Â Â Â Â Â Â Âerr);
>> + Â Â Â Â Â Â goto exit_device_free;
>> + Â Â }
>> +
>> + Â Â pdev_entry->pdev = pdev;
>> + Â Â pdev_entry->cpu = cpu;
>> + Â Â mutex_lock(&pdev_list_mutex);
>> + Â Â list_add_tail(&pdev_entry->list, &pdev_list);
>> + Â Â mutex_unlock(&pdev_list_mutex);
>> +
>> + Â Â return 0;
>> +
>> +exit_device_free:
>> + Â Â kfree(pdev_entry);
>> +exit_device_put:
>> + Â Â platform_device_put(pdev);
>> +exit:
>> + Â Â return err;
>> +}
>> +
>> +#ifdef CONFIG_HOTPLUG_CPU
>> +static void via_cputemp_device_remove(unsigned int cpu)
>> +{
>> + Â Â struct pdev_entry *p, *n;
>> + Â Â mutex_lock(&pdev_list_mutex);
>> + Â Â list_for_each_entry_safe(p, n, &pdev_list, list) {
>> + Â Â Â Â Â Â if (p->cpu == cpu) {
>> + Â Â Â Â Â Â Â Â Â Â platform_device_unregister(p->pdev);
>> + Â Â Â Â Â Â Â Â Â Â list_del(&p->list);
>> + Â Â Â Â Â Â Â Â Â Â kfree(p);
>> + Â Â Â Â Â Â }
>> + Â Â }
>> + Â Â mutex_unlock(&pdev_list_mutex);
>> +}
>> +
>> +static int __cpuinit via_cputemp_cpu_callback(struct notifier_block *nfb,
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âunsigned long action, void *hcpu)
>> +{
>> + Â Â unsigned int cpu = (unsigned long) hcpu;
>> +
>> + Â Â switch (action) {
>> + Â Â case CPU_ONLINE:
>> + Â Â case CPU_DOWN_FAILED:
>> + Â Â Â Â Â Â via_cputemp_device_add(cpu);
>> + Â Â Â Â Â Â break;
>> + Â Â case CPU_DOWN_PREPARE:
>> + Â Â Â Â Â Â via_cputemp_device_remove(cpu);
>> + Â Â Â Â Â Â break;
>> + Â Â }
>> + Â Â return NOTIFY_OK;
>> +}
>> +
>> +static struct notifier_block via_cputemp_cpu_notifier __refdata = {
>> + Â Â .notifier_call = via_cputemp_cpu_callback,
>> +};
>> +#endif                /* !CONFIG_HOTPLUG_CPU */
>> +
>> +static int __init via_cputemp_init(void)
>> +{
>> + Â Â int i, err = -ENODEV;
>
> err would be better initialized when the error occurs, for consistency.
>
>> + Â Â struct pdev_entry *p, *n;
>> +
>> + Â Â if (cpu_data(0).x86_vendor != X86_VENDOR_CENTAUR) {
>> + Â Â Â Â Â Â printk(KERN_DEBUG "not a VIA CPU\n");
>
> Missing DRV_NAME.
>
>> + Â Â Â Â Â Â goto exit;
>> + Â Â }
>> +
>> + Â Â err = platform_driver_register(&via_cputemp_driver);
>> + Â Â if (err)
>> + Â Â Â Â Â Â goto exit;
>> +
>> + Â Â for_each_online_cpu(i) {
>> + Â Â Â Â Â Â struct cpuinfo_x86 *c = &cpu_data(i);
>> +
>> + Â Â Â Â Â Â if (c->x86 != 6)
>> + Â Â Â Â Â Â Â Â Â Â continue;
>> +
>> + Â Â Â Â Â Â if (c->x86_model < 0x0a)
>> + Â Â Â Â Â Â Â Â Â Â continue;
>> +
>> + Â Â Â Â Â Â if (c->x86_model > 0x0f) {
>> + Â Â Â Â Â Â Â Â Â Â printk(KERN_WARNING DRVNAME ": Unknown CPU "
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â "model %x\n", c->x86_model);
>
> Please use 0x%x for clarity.
>
>> + Â Â Â Â Â Â Â Â Â Â continue;
>> + Â Â Â Â Â Â }
>> +
>> + Â Â Â Â Â Â err = via_cputemp_device_add(i);
>> + Â Â Â Â Â Â if (err)
>> + Â Â Â Â Â Â Â Â Â Â goto exit_devices_unreg;
>> + Â Â }
>> + Â Â if (list_empty(&pdev_list)) {
>> + Â Â Â Â Â Â err = -ENODEV;
>> + Â Â Â Â Â Â goto exit_driver_unreg;
>> + Â Â }
>> +
>> +#ifdef CONFIG_HOTPLUG_CPU
>> + Â Â register_hotcpu_notifier(&via_cputemp_cpu_notifier);
>> +#endif
>> + Â Â return 0;
>> +
>> +exit_devices_unreg:
>> + Â Â mutex_lock(&pdev_list_mutex);
>> + Â Â list_for_each_entry_safe(p, n, &pdev_list, list) {
>> + Â Â Â Â Â Â platform_device_unregister(p->pdev);
>> + Â Â Â Â Â Â list_del(&p->list);
>> + Â Â Â Â Â Â kfree(p);
>> + Â Â }
>> + Â Â mutex_unlock(&pdev_list_mutex);
>> +exit_driver_unreg:
>> + Â Â platform_driver_unregister(&via_cputemp_driver);
>> +exit:
>> + Â Â return err;
>> +}
>> +
>> +static void __exit via_cputemp_exit(void)
>> +{
>> + Â Â struct pdev_entry *p, *n;
>> +#ifdef CONFIG_HOTPLUG_CPU
>> + Â Â unregister_hotcpu_notifier(&via_cputemp_cpu_notifier);
>> +#endif
>> + Â Â mutex_lock(&pdev_list_mutex);
>> + Â Â list_for_each_entry_safe(p, n, &pdev_list, list) {
>> + Â Â Â Â Â Â platform_device_unregister(p->pdev);
>> + Â Â Â Â Â Â list_del(&p->list);
>> + Â Â Â Â Â Â kfree(p);
>> + Â Â }
>> + Â Â mutex_unlock(&pdev_list_mutex);
>> + Â Â platform_driver_unregister(&via_cputemp_driver);
>> +}
>> +
>> +MODULE_AUTHOR("Harald Welte <HaraldWelte@xxxxxxxxxxx>");
>> +MODULE_DESCRIPTION("VIA CPU temperature monitor");
>> +MODULE_LICENSE("GPL");
>> +
>> +module_init(via_cputemp_init)
>> +module_exit(via_cputemp_exit)
>
> The rest looks OK.
>
> --
> Jean Delvare
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/