Re: [PATCH 3/7] hwmon/coretemp: Handle large core id value

From: Guenter Roeck
Date: Fri Aug 12 2022 - 13:16:09 EST


On Sat, Aug 13, 2022 at 12:41:40AM +0800, Zhang Rui wrote:
> The coretemp driver supports up to a hard-coded limit of 128 cores.
>
> Today, the driver can not support a core with an id above that limit.
> Yet, the encoding of core_id's is arbitrary (BIOS APIC-id) and so they
> may be sparse and they may be large.
>
> Update the driver to map arbitrary core_id numbers into appropriate
> array indexes so that 128 cores can be supported, no matter the encoding
> of core_ids's.
>
> Acked-by: Len Brown <len.brown@xxxxxxxxx>
> Signed-off-by: Zhang Rui <rui.zhang@xxxxxxxxx>
> ---
> drivers/hwmon/coretemp.c | 55 +++++++++++++++++++++++++++++-----------
> 1 file changed, 40 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index ccf0af5b988a..3f0f7d7612ae 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -46,9 +46,6 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
> #define TOTAL_ATTRS (MAX_CORE_ATTRS + 1)
> #define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
>
> -#define TO_CORE_ID(cpu) (cpu_data(cpu).cpu_core_id)
> -#define TO_ATTR_NO(cpu) (TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO)
> -
> #ifdef CONFIG_SMP
> #define for_each_sibling(i, cpu) \
> for_each_cpu(i, topology_sibling_cpumask(cpu))
> @@ -91,6 +88,8 @@ struct temp_data {
> struct platform_data {
> struct device *hwmon_dev;
> u16 pkg_id;
> + u16 cpu_map[NUM_REAL_CORES];
> + struct ida ida;
> struct cpumask cpumask;
> struct temp_data *core_data[MAX_CORE_DATA];
> struct device_attribute name_attr;
> @@ -441,7 +440,7 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
> MSR_IA32_THERM_STATUS;
> tdata->is_pkg_data = pkg_flag;
> tdata->cpu = cpu;
> - tdata->cpu_core_id = TO_CORE_ID(cpu);
> + tdata->cpu_core_id = topology_core_id(cpu);
> tdata->attr_size = MAX_CORE_ATTRS;
> mutex_init(&tdata->update_lock);
> return tdata;
> @@ -454,7 +453,7 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
> struct platform_data *pdata = platform_get_drvdata(pdev);
> struct cpuinfo_x86 *c = &cpu_data(cpu);
> u32 eax, edx;
> - int err, attr_no;
> + int err, index, attr_no;
>
> /*
> * Find attr number for sysfs:
> @@ -462,14 +461,26 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
> * The attr number is always core id + 2
> * The Pkgtemp will always show up as temp1_*, if available
> */
> - attr_no = pkg_flag ? PKG_SYSFS_ATTR_NO : TO_ATTR_NO(cpu);
> + if (pkg_flag)
> + attr_no = PKG_SYSFS_ATTR_NO;
> + else {
> + index = ida_alloc(&pdata->ida, GFP_KERNEL);
> + if (index < 0)
> + return index;
> + pdata->cpu_map[index] = topology_core_id(cpu);
> + attr_no = index + BASE_SYSFS_ATTR_NO;
> + }
>
> - if (attr_no > MAX_CORE_DATA - 1)
> - return -ERANGE;
> + if (attr_no > MAX_CORE_DATA - 1) {
> + err = -ERANGE;
> + goto ida_free;
> + }
>
> tdata = init_temp_data(cpu, pkg_flag);
> - if (!tdata)
> - return -ENOMEM;
> + if (!tdata) {
> + err = -ENOMEM;
> + goto ida_free;
> + }
>
> /* Test if we can access the status register */
> err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax, &edx);
> @@ -505,6 +516,9 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
> exit_free:
> pdata->core_data[attr_no] = NULL;
> kfree(tdata);
> +ida_free:
> + if (!pkg_flag)
> + ida_free(&pdata->ida, index);
> return err;
> }
>
> @@ -524,6 +538,8 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx)
>
> kfree(pdata->core_data[indx]);
> pdata->core_data[indx] = NULL;
> +
> + ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO);
> }
>
> static int coretemp_probe(struct platform_device *pdev)
> @@ -537,6 +553,7 @@ static int coretemp_probe(struct platform_device *pdev)
> return -ENOMEM;
>
> pdata->pkg_id = pdev->id;
> + ida_init(&pdata->ida);
> platform_set_drvdata(pdev, pdata);
>
> pdata->hwmon_dev = devm_hwmon_device_register_with_groups(dev, DRVNAME,
> @@ -553,6 +570,7 @@ static int coretemp_remove(struct platform_device *pdev)
> if (pdata->core_data[i])
> coretemp_remove_core(pdata, i);
>
> + ida_destroy(&pdata->ida);
> return 0;
> }
>
> @@ -647,7 +665,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
> struct platform_device *pdev = coretemp_get_pdev(cpu);
> struct platform_data *pd;
> struct temp_data *tdata;
> - int indx, target;
> + int i, indx = -1, target;
>
> /*
> * Don't execute this on suspend as the device remove locks
> @@ -660,12 +678,19 @@ static int coretemp_cpu_offline(unsigned int cpu)
> if (!pdev)
> return 0;
>
> - /* The core id is too big, just return */
> - indx = TO_ATTR_NO(cpu);
> - if (indx > MAX_CORE_DATA - 1)
> + pd = platform_get_drvdata(pdev);
> +
> + for (i = 0; i < NUM_REAL_CORES; i++) {
> + if (pd->cpu_map[i] == topology_core_id(cpu)) {
> + indx = i + BASE_SYSFS_ATTR_NO;
> + break;
> + }
> + }
> +
> + /* Too many cores and this core is not pupolated, just return */

populated

Other than that looks good.

Acked-by: Guenter Roeck <linux@xxxxxxxxxxxx>

> + if (indx < 0)
> return 0;
>
> - pd = platform_get_drvdata(pdev);
> tdata = pd->core_data[indx];
>
> cpumask_clear_cpu(cpu, &pd->cpumask);
> --
> 2.34.1
>