Re: [PATCH v2] cpu/hotplug: wait for cpuset_hotplug_work to finish on cpu onlining

From: Daniel Jordan
Date: Fri Feb 12 2021 - 14:43:47 EST


Alexey Klimov <aklimov@xxxxxxxxxx> writes:
> int cpu_device_up(struct device *dev)

Yeah, definitely better to do the wait here.

> int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
> {
> - int cpu, ret = 0;
> + struct device *dev;
> + cpumask_var_t mask;
> + int cpu, ret;
> +
> + if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
> + return -ENOMEM;
>
> + ret = 0;
> cpu_maps_update_begin();
> for_each_online_cpu(cpu) {
> if (topology_is_primary_thread(cpu))
> @@ -2099,18 +2098,35 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
> * called under the sysfs hotplug lock, so it is properly
> * serialized against the regular offline usage.
> */
> - cpuhp_offline_cpu_device(cpu);
> + dev = get_cpu_device(cpu);
> + dev->offline = true;
> +
> + cpumask_set_cpu(cpu, mask);
> }
> if (!ret)
> cpu_smt_control = ctrlval;
> cpu_maps_update_done();
> +
> + /* Tell user space about the state changes */
> + for_each_cpu(cpu, mask) {
> + dev = get_cpu_device(cpu);
> + kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
> + }
> +
> + free_cpumask_var(mask);
> return ret;
> }

Hrm, should the dev manipulation be kept in one place, something like
this?

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8817ccdc8e112..aa21219a7b7c4 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2085,11 +2085,20 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
if (ret)
break;
+
+ cpumask_set_cpu(cpu, mask);
+ }
+ if (!ret)
+ cpu_smt_control = ctrlval;
+ cpu_maps_update_done();
+
+ /* Tell user space about the state changes */
+ for_each_cpu(cpu, mask) {
/*
- * As this needs to hold the cpu maps lock it's impossible
+ * When the cpu maps lock was taken above it was impossible
* to call device_offline() because that ends up calling
* cpu_down() which takes cpu maps lock. cpu maps lock
- * needs to be held as this might race against in kernel
+ * needed to be held as this might race against in kernel
* abusers of the hotplug machinery (thermal management).
*
* So nothing would update device:offline state. That would
@@ -2100,16 +2109,6 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
*/
dev = get_cpu_device(cpu);
dev->offline = true;
-
- cpumask_set_cpu(cpu, mask);
- }
- if (!ret)
- cpu_smt_control = ctrlval;
- cpu_maps_update_done();
-
- /* Tell user space about the state changes */
- for_each_cpu(cpu, mask) {
- dev = get_cpu_device(cpu);
kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
}

@@ -2136,9 +2135,6 @@ int cpuhp_smt_enable(void)
ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
if (ret)
break;
- /* See comment in cpuhp_smt_disable() */
- dev = get_cpu_device(cpu);
- dev->offline = false;

cpumask_set_cpu(cpu, mask);
}
@@ -2152,7 +2148,9 @@ int cpuhp_smt_enable(void)

/* Tell user space about the state changes */
for_each_cpu(cpu, mask) {
+ /* See comment in cpuhp_smt_disable() */
dev = get_cpu_device(cpu);
+ dev->offline = false;
kobject_uevent(&dev->kobj, KOBJ_ONLINE);
}