[GIT PULL] KVM cpu hotplug fixes

From: Avi Kivity
Date: Tue May 29 2007 - 09:20:54 EST


Linus,

Please pull from the repository and branch

git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git hotplug-linus

to receive a patchset which makes cpu hotplug (and therefore, suspend and
resume) more robust when running virtual machines. The core issue is that
we need a notification about a cpu going away at a point in time where it's
still alive, but not running any user processes. Such a notification does
not exist today, so the patchset adds it as a new CPU_DYING notification.

The patchset is against 2.6.22-rc3.

Shortlog:

Avi Kivity (7):
HOTPLUG: Add CPU_DYING notifier
HOTPLUG: Adapt cpuset hotplug callback to CPU_DYING
HOTPLUG: Adapt thermal throttle to CPU_DYING
SMP: Implement on_cpu()
KVM: Keep track of which cpus have virtualization enabled
KVM: Tune hotplug/suspend IPIs
KVM: Use CPU_DYING for disabling virtualization

arch/i386/kernel/cpu/mcheck/therm_throt.c | 6 ++-
drivers/kvm/kvm_main.c | 50 +++++++++++++++++++++--------
include/linux/notifier.h | 3 ++
include/linux/smp.h | 16 +++++++++
kernel/cpu.c | 16 ++++++++-
kernel/cpuset.c | 3 ++
kernel/softirq.c | 24 ++++++++++++++
7 files changed, 100 insertions(+), 18 deletions(-)

And the patch in all its glory:

diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c
index 7ba7c3a..1203dc5 100644
--- a/arch/i386/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c
@@ -134,19 +134,21 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
int err;

sys_dev = get_cpu_sysdev(cpu);
- mutex_lock(&therm_cpu_lock);
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
+ mutex_lock(&therm_cpu_lock);
err = thermal_throttle_add_dev(sys_dev);
+ mutex_unlock(&therm_cpu_lock);
WARN_ON(err);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
+ mutex_lock(&therm_cpu_lock);
thermal_throttle_remove_dev(sys_dev);
+ mutex_unlock(&therm_cpu_lock);
break;
}
- mutex_unlock(&therm_cpu_lock);
return NOTIFY_OK;
}

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index da985b3..1ad5ea1 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -41,6 +41,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/sched.h>
+#include <linux/cpumask.h>

#include "x86_emulate.h"
#include "segment_descriptor.h"
@@ -51,8 +52,12 @@ MODULE_LICENSE("GPL");
static DEFINE_SPINLOCK(kvm_lock);
static LIST_HEAD(vm_list);

+static cpumask_t cpus_hardware_enabled;
+
struct kvm_arch_ops *kvm_arch_ops;

+static void hardware_disable(void *ignored);
+
#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)

static struct kvm_stats_debugfs_item {
@@ -2840,7 +2845,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
* in vmx root mode.
*/
printk(KERN_INFO "kvm: exiting hardware virtualization\n");
- on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+ on_each_cpu(hardware_disable, NULL, 0, 1);
}
return NOTIFY_OK;
}
@@ -2883,28 +2888,46 @@ static void decache_vcpus_on_cpu(int cpu)
spin_unlock(&kvm_lock);
}

+static void hardware_enable(void *junk)
+{
+ int cpu = raw_smp_processor_id();
+
+ if (cpu_isset(cpu, cpus_hardware_enabled))
+ return;
+ cpu_set(cpu, cpus_hardware_enabled);
+ kvm_arch_ops->hardware_enable(NULL);
+}
+
+static void hardware_disable(void *junk)
+{
+ int cpu = raw_smp_processor_id();
+
+ if (!cpu_isset(cpu, cpus_hardware_enabled))
+ return;
+ cpu_clear(cpu, cpus_hardware_enabled);
+ decache_vcpus_on_cpu(cpu);
+ kvm_arch_ops->hardware_disable(NULL);
+}
+
static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
void *v)
{
int cpu = (long)v;

switch (val) {
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
+ case CPU_DYING:
+ case CPU_DYING_FROZEN:
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
cpu);
- decache_vcpus_on_cpu(cpu);
- smp_call_function_single(cpu, kvm_arch_ops->hardware_disable,
- NULL, 0, 1);
+ on_cpu(cpu, hardware_disable, NULL, 0, 1);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
cpu);
- smp_call_function_single(cpu, kvm_arch_ops->hardware_enable,
- NULL, 0, 1);
+ on_cpu(cpu, hardware_enable, NULL, 0, 1);
break;
}
return NOTIFY_OK;
@@ -2961,14 +2984,13 @@ static void kvm_exit_debug(void)

static int kvm_suspend(struct sys_device *dev, pm_message_t state)
{
- decache_vcpus_on_cpu(raw_smp_processor_id());
- on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+ hardware_disable(NULL);
return 0;
}

static int kvm_resume(struct sys_device *dev)
{
- on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1);
+ hardware_enable(NULL);
return 0;
}

@@ -3021,7 +3043,7 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
if (r < 0)
goto out;

- on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1);
+ on_each_cpu(hardware_enable, NULL, 0, 1);
r = register_cpu_notifier(&kvm_cpu_notifier);
if (r)
goto out_free_1;
@@ -3053,7 +3075,7 @@ out_free_2:
unregister_reboot_notifier(&kvm_reboot_notifier);
unregister_cpu_notifier(&kvm_cpu_notifier);
out_free_1:
- on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+ on_each_cpu(hardware_disable, NULL, 0, 1);
kvm_arch_ops->hardware_unsetup();
out:
kvm_arch_ops = NULL;
@@ -3067,7 +3089,7 @@ void kvm_exit_arch(void)
sysdev_class_unregister(&kvm_sysdev_class);
unregister_reboot_notifier(&kvm_reboot_notifier);
unregister_cpu_notifier(&kvm_cpu_notifier);
- on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+ on_each_cpu(hardware_disable, NULL, 0, 1);
kvm_arch_ops->hardware_unsetup();
kvm_arch_ops = NULL;
}
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 9431101..576f2bb 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -196,6 +196,8 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
#define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */
#define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */
+#define CPU_DYING 0x000A /* CPU (unsigned)v not running any task,
+ * not handling interrupts, soon dead */

/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
* operation in progress
@@ -208,6 +210,7 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
+#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN)

#endif /* __KERNEL__ */
#endif /* _LINUX_NOTIFIER_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 96ac21f..613edd2 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -7,6 +7,7 @@
*/

#include <linux/errno.h>
+#include <asm/system.h>

extern void cpu_idle(void);

@@ -61,6 +62,11 @@ int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
* Call a function on all processors
*/
int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait);
+/*
+ * Call a function on one processor
+ */
+int on_cpu(int cpu, void (*func)(void *info), void *info,
+ int retry, int wait);

#define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */
#define MSG_ALL 0x8001
@@ -96,6 +102,16 @@ static inline int up_smp_call_function(void)
local_irq_enable(); \
0; \
})
+
+static inline int on_cpu(int cpu, void (*func)(void *info), void *info,
+ int retry, int wait)
+{
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
+ return 0;
+}
+
static inline void smp_send_reschedule(int cpu) { }
#define num_booting_cpus() 1
#define smp_prepare_boot_cpu() do {} while (0)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 208cf34..181ae70 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -103,11 +103,19 @@ static inline void check_for_tasks(int cpu)
write_unlock_irq(&tasklist_lock);
}

+struct take_cpu_down_param {
+ unsigned long mod;
+ void *hcpu;
+};
+
/* Take this CPU down. */
-static int take_cpu_down(void *unused)
+static int take_cpu_down(void *_param)
{
+ struct take_cpu_down_param *param = _param;
int err;

+ raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+ param->hcpu);
/* Ensure this CPU doesn't handle any more interrupts. */
err = __cpu_disable();
if (err < 0)
@@ -127,6 +135,10 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
cpumask_t old_allowed, tmp;
void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+ struct take_cpu_down_param tcd_param = {
+ .mod = mod,
+ .hcpu = hcpu,
+ };

if (num_online_cpus() == 1)
return -EBUSY;
@@ -153,7 +165,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
set_cpus_allowed(current, tmp);

mutex_lock(&cpu_bitmask_lock);
- p = __stop_machine_run(take_cpu_down, NULL, cpu);
+ p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
mutex_unlock(&cpu_bitmask_lock);

if (IS_ERR(p) || cpu_online(cpu)) {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f57854b..584953a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2138,6 +2138,9 @@ static void common_cpu_mem_hotplug_unplug(void)
static int cpuset_handle_cpuhp(struct notifier_block *nb,
unsigned long phase, void *cpu)
{
+ if (phase == CPU_DYING || phase == CPU_DYING_FROZEN)
+ return NOTIFY_DONE;
+
common_cpu_mem_hotplug_unplug();
return 0;
}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0b9886a..11666f7 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -658,4 +658,28 @@ int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
return ret;
}
EXPORT_SYMBOL(on_each_cpu);
+
+/*
+ * Call a function on one processor, which might be the currently executing
+ * processor.
+ */
+int on_cpu(int cpu, void (*func) (void *info), void *info,
+ int retry, int wait)
+{
+ int ret;
+ int this_cpu;
+
+ this_cpu = get_cpu();
+ if (this_cpu == cpu) {
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
+ ret = 0;
+ } else
+ ret = smp_call_function_single(cpu, func, info, retry, wait);
+ put_cpu();
+ return ret;
+}
+EXPORT_SYMBOL(on_cpu);
+
#endif
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/