[PATCH 7/7] Xen: Implement CPU hotplugging

From: Alex Nixon
Date: Thu Aug 21 2008 - 14:07:30 EST


Note the changes from 2.6.18-xen CPU hotplugging:

A vcpu_down request from the remote admin via Xenbus both hotunplugs the CPU, and disables it by removing it from the cpu_present map, and removing its entry in /sys

A vcpu_up request from the remote admin only re-enables the CPU, and does not immediately bring the CPU up. A udev event is emitted, which can be caught by the user if he wishes to automatically re-up CPUs when available, or implement a more complex policy.

Signed-off-by: Alex Nixon <alex.nixon@xxxxxxxxxx>
Cc: Jeremy Fitzhardinge <jeremy@xxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxx>
---
arch/x86/xen/enlighten.c | 7 +++
arch/x86/xen/irq.c | 2 +-
arch/x86/xen/smp.c | 52 +++++++++++++++++++++-----
arch/x86/xen/spinlock.c | 5 ++
arch/x86/xen/time.c | 8 ++++
arch/x86/xen/xen-ops.h | 6 +++
drivers/xen/Makefile | 2 +-
drivers/xen/cpu_hotplug.c | 90 +++++++++++++++++++++++++++++++++++++++++++++
drivers/xen/events.c | 4 ++
9 files changed, 164 insertions(+), 12 deletions(-)
create mode 100644 drivers/xen/cpu_hotplug.c

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c421049..204d64b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1342,6 +1342,12 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
.set_fixmap = xen_set_fixmap,
};

+static const struct pv_hotplug_ops xen_hotplug_ops = {
+ .play_dead = xen_play_dead,
+ .cpu_disable = xen_cpu_disable,
+ .cpu_die = xen_cpu_die,
+};
+
static void xen_reboot(int reason)
{
struct sched_shutdown r = { .reason = reason };
@@ -1655,6 +1661,7 @@ asmlinkage void __init xen_start_kernel(void)
pv_cpu_ops = xen_cpu_ops;
pv_apic_ops = xen_apic_ops;
pv_mmu_ops = xen_mmu_ops;
+ pv_hotplug_ops = xen_hotplug_ops;

xen_init_irq_ops();

diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 4e3f7f7..f33f75b 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -134,7 +134,7 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
.restore_fl = xen_restore_fl,
.irq_disable = xen_irq_disable,
.irq_enable = xen_irq_enable,
- .wb_invd_halt = xen_wbinvd_halt,
+ .wbinvd_halt = xen_wbinvd_halt,
.safe_halt = xen_safe_halt,
.halt = xen_halt,
#ifdef CONFIG_X86_64
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index baca7f2..682eaa4 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -12,7 +12,6 @@
* result, all CPUs are treated as if they're single-core and
* single-threaded.
*
- * This does not handle HOTPLUG_CPU yet.
*/
#include <linux/sched.h>
#include <linux/err.h>
@@ -61,11 +60,12 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}

-static __cpuinit void cpu_bringup_and_idle(void)
+static __cpuinit void cpu_bringup(void)
{
int cpu = smp_processor_id();

cpu_init();
+ touch_softlockup_watchdog();
preempt_disable();

xen_enable_sysenter();
@@ -86,6 +86,11 @@ static __cpuinit void cpu_bringup_and_idle(void)
local_irq_enable();

wmb(); /* make sure everything is out */
+}
+
+static __cpuinit void cpu_bringup_and_idle(void)
+{
+ cpu_bringup();
cpu_idle();
}

@@ -209,8 +214,6 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)

cpu_set(cpu, cpu_present_map);
}
-
- //init_xenbus_allowed_cpumask();
}

static __cpuinit int
@@ -278,12 +281,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
struct task_struct *idle = idle_task(cpu);
int rc;

-#if 0
- rc = cpu_up_check(cpu);
- if (rc)
- return rc;
-#endif
-
#ifdef CONFIG_X86_64
/* Allocate node local memory for AP pdas */
WARN_ON(cpu == 0);
@@ -336,6 +333,41 @@ static void xen_smp_cpus_done(unsigned int max_cpus)
{
}

+int xen_cpu_disable(void)
+{
+ unsigned int cpu = smp_processor_id();
+ if (cpu == 0)
+ return -EBUSY;
+
+ cpu_disable_common();
+
+ load_cr3(swapper_pg_dir);
+ return 0;
+}
+
+void xen_cpu_die(unsigned int cpu)
+{
+ while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
+ current->state = TASK_UNINTERRUPTIBLE;
+ schedule_timeout(HZ/10);
+ }
+ unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+ xen_uninit_lock_cpu(cpu);
+ xen_teardown_timer(cpu);
+
+ if (num_online_cpus() == 1)
+ alternatives_smp_switch(0);
+}
+
+void xen_play_dead(void)
+{
+ native_play_dead();
+ cpu_bringup();
+}
+
static void stop_self(void *v)
{
int cpu = smp_processor_id();
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index bfb1707..74a5114 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -173,6 +173,11 @@ void __cpuinit xen_init_lock_cpu(int cpu)
printk("cpu %d spinlock event irq %d\n", cpu, irq);
}

+void xen_uninit_lock_cpu(int cpu)
+{
+ unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
+}
+
void __init xen_init_spinlocks(void)
{
pv_lock_ops.spin_is_locked = xen_spin_is_locked;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 685b774..8034d69 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -452,6 +452,14 @@ void xen_setup_timer(int cpu)
setup_runstate_info(cpu);
}

+void xen_teardown_timer(int cpu)
+{
+ struct clock_event_device *evt;
+ BUG_ON(cpu == 0);
+ evt = &per_cpu(xen_clock_events, cpu);
+ unbind_from_irqhandler(evt->irq, NULL);
+}
+
void xen_setup_cpu_clockevents(void)
{
BUG_ON(preemptible());
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 3c70ebc..a16e5b5 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -33,6 +33,7 @@ void __init xen_build_dynamic_phys_to_machine(void);

void xen_init_irq_ops(void);
void xen_setup_timer(int cpu);
+void xen_teardown_timer(int cpu);
void xen_setup_cpu_clockevents(void);
unsigned long xen_tsc_khz(void);
void __init xen_time_init(void);
@@ -48,11 +49,16 @@ void xen_mark_init_mm_pinned(void);

void __init xen_setup_vcpu_info_placement(void);

+void xen_play_dead(void);
+void xen_cpu_die(unsigned int cpu);
+int xen_cpu_disable(void);
+
#ifdef CONFIG_SMP
void xen_smp_init(void);

void __init xen_init_spinlocks(void);
__cpuinit void xen_init_lock_cpu(int cpu);
+void xen_uninit_lock_cpu(int cpu);

extern cpumask_t xen_cpu_initialized_map;
#else
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 363286c..f62d8df 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,4 +1,4 @@
-obj-y += grant-table.o features.o events.o manage.o
+obj-y += grant-table.o features.o events.o manage.o cpu_hotplug.o
obj-y += xenbus/
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += balloon.o
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
new file mode 100644
index 0000000..f1727ce
--- /dev/null
+++ b/drivers/xen/cpu_hotplug.c
@@ -0,0 +1,90 @@
+#include <linux/notifier.h>
+
+#include <xen/xenbus.h>
+
+#include <asm-x86/xen/hypervisor.h>
+#include <asm/cpu.h>
+
+static void enable_hotplug_cpu(int cpu)
+{
+ if (!cpu_present(cpu))
+ arch_register_cpu(cpu);
+
+ cpu_set(cpu, cpu_present_map);
+}
+
+static void disable_hotplug_cpu(int cpu)
+{
+ if (cpu_present(cpu))
+ arch_unregister_cpu(cpu);
+
+ cpu_clear(cpu, cpu_present_map);
+}
+
+static void vcpu_hotplug(unsigned int cpu)
+{
+ int err;
+ char dir[32], state[32];
+
+ if (!cpu_possible(cpu))
+ return;
+
+ sprintf(dir, "cpu/%u", cpu);
+ err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
+ if (err != 1) {
+ printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
+ return;
+ }
+
+ if (strcmp(state, "online") == 0) {
+ enable_hotplug_cpu(cpu);
+ } else if (strcmp(state, "offline") == 0) {
+ (void)cpu_down(cpu);
+ disable_hotplug_cpu(cpu);
+ } else {
+ printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
+ state, cpu);
+ }
+}
+
+static void handle_vcpu_hotplug_event(
+ struct xenbus_watch *watch, const char **vec, unsigned int len)
+{
+ unsigned int cpu;
+ char *cpustr;
+ const char *node = vec[XS_WATCH_PATH];
+
+ cpustr = strstr(node, "cpu/");
+ if (cpustr != NULL) {
+ sscanf(cpustr, "cpu/%u", &cpu);
+ vcpu_hotplug(cpu);
+ }
+}
+
+static int setup_cpu_watcher(struct notifier_block *notifier,
+ unsigned long event, void *data)
+{
+ static struct xenbus_watch cpu_watch = {
+ .node = "cpu",
+ .callback = handle_vcpu_hotplug_event};
+
+ (void)register_xenbus_watch(&cpu_watch);
+
+ return NOTIFY_DONE;
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+ static struct notifier_block xsn_cpu = {
+ .notifier_call = setup_cpu_watcher };
+
+ if (!is_running_on_xen())
+ return -ENODEV;
+
+ register_xenstore_notifier(&xsn_cpu);
+
+ return 0;
+}
+
+arch_initcall(setup_vcpu_hotplug_event);
+
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 2a49ffc..63ca861 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -358,6 +358,10 @@ static void unbind_from_irq(unsigned int irq)
per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
[index_from_irq(irq)] = -1;
break;
+ case IRQT_IPI:
+ per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
+ [index_from_irq(irq)] = -1;
+ break;
default:
break;
}
--
1.5.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/