Re: [Xen-devel] [patch 30/44] xen: Add support for preemption

From: tgh
Date: Tue Oct 30 2007 - 05:23:14 EST


hi
I am using xen,and I am curious about whether Xen support the preempt
scheduler for the VMs or not
could the VM be prempted by xen to scheduler another VM?

Thanks in advance


Jeremy Fitzhardinge 写道:
> Add Xen support for preemption. This is mostly a cleanup of existing
> preempt_enable/disable calls, or just comments to explain the current
> usage.
>
> Signed-off-by: Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx>
> Signed-off-by: Chris Wright <chrisw@xxxxxxxxxxxx>
>
> ---
> arch/i386/xen/Kconfig | 2
> arch/i386/xen/enlighten.c | 98 ++++++++++++++++++++++++++------------------
> arch/i386/xen/mmu.c | 5 +-
> arch/i386/xen/multicalls.c | 11 ++--
> arch/i386/xen/time.c | 22 +++++++--
> 5 files changed, 86 insertions(+), 52 deletions(-)
>
> ===================================================================
> --- a/arch/i386/xen/Kconfig
> +++ b/arch/i386/xen/Kconfig
> @@ -4,7 +4,7 @@
>
> config XEN
> bool "Enable support for Xen hypervisor"
> - depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !(PREEMPT || NEED_MULTIPLE_NODES)
> + depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES
> help
> This is the Linux Xen port. Enabling this will allow the
> kernel to boot in a paravirtualized environment under the
> ===================================================================
> --- a/arch/i386/xen/enlighten.c
> +++ b/arch/i386/xen/enlighten.c
> @@ -15,6 +15,7 @@
> #include <linux/init.h>
> #include <linux/smp.h>
> #include <linux/preempt.h>
> +#include <linux/hardirq.h>
> #include <linux/percpu.h>
> #include <linux/delay.h>
> #include <linux/start_kernel.h>
> @@ -108,11 +109,10 @@ static unsigned long xen_save_fl(void)
> struct vcpu_info *vcpu;
> unsigned long flags;
>
> - preempt_disable();
> vcpu = x86_read_percpu(xen_vcpu);
> +
> /* flag has opposite sense of mask */
> flags = !vcpu->evtchn_upcall_mask;
> - preempt_enable();
>
> /* convert to IF type flag
> -0 -> 0x00000000
> @@ -125,51 +125,56 @@ static void xen_restore_fl(unsigned long
> {
> struct vcpu_info *vcpu;
>
> - preempt_disable();
> -
> /* convert from IF type flag */
> flags = !(flags & X86_EFLAGS_IF);
> +
> + /* There's a one instruction preempt window here. We need to
> + make sure we're don't switch CPUs between getting the vcpu
> + pointer and updating the mask. */
> + preempt_disable();
> vcpu = x86_read_percpu(xen_vcpu);
> vcpu->evtchn_upcall_mask = flags;
> + preempt_enable_no_resched();
> +
> + /* Doesn't matter if we get preempted here, because any
> + pending event will get dealt with anyway. */
>
> if (flags == 0) {
> - /* Unmask then check (avoid races). We're only protecting
> - against updates by this CPU, so there's no need for
> - anything stronger. */
> - barrier();
> -
> + preempt_check_resched();
> + barrier(); /* unmask then check (avoid races) */
> if (unlikely(vcpu->evtchn_upcall_pending))
> force_evtchn_callback();
> - preempt_enable();
> - } else
> - preempt_enable_no_resched();
> + }
> }
>
> static void xen_irq_disable(void)
> {
> + /* There's a one instruction preempt window here. We need to
> + make sure we're don't switch CPUs between getting the vcpu
> + pointer and updating the mask. */
> + preempt_disable();
> + x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
> + preempt_enable_no_resched();
> +}
> +
> +static void xen_irq_enable(void)
> +{
> struct vcpu_info *vcpu;
> - preempt_disable();
> - vcpu = x86_read_percpu(xen_vcpu);
> - vcpu->evtchn_upcall_mask = 1;
> - preempt_enable_no_resched();
> -}
> -
> -static void xen_irq_enable(void)
> -{
> - struct vcpu_info *vcpu;
> -
> +
> + /* There's a one instruction preempt window here. We need to
> + make sure we're don't switch CPUs between getting the vcpu
> + pointer and updating the mask. */
> preempt_disable();
> vcpu = x86_read_percpu(xen_vcpu);
> vcpu->evtchn_upcall_mask = 0;
> -
> - /* Unmask then check (avoid races). We're only protecting
> - against updates by this CPU, so there's no need for
> - anything stronger. */
> - barrier();
> -
> + preempt_enable_no_resched();
> +
> + /* Doesn't matter if we get preempted here, because any
> + pending event will get dealt with anyway. */
> +
> + barrier(); /* unmask then check (avoid races) */
> if (unlikely(vcpu->evtchn_upcall_pending))
> force_evtchn_callback();
> - preempt_enable();
> }
>
> static void xen_safe_halt(void)
> @@ -189,6 +194,8 @@ static void xen_halt(void)
>
> static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
> {
> + BUG_ON(preemptible());
> +
> switch (mode) {
> case PARAVIRT_LAZY_NONE:
> BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
> @@ -293,9 +300,13 @@ static void xen_write_ldt_entry(struct d
> xmaddr_t mach_lp = virt_to_machine(lp);
> u64 entry = (u64)high << 32 | low;
>
> + preempt_disable();
> +
> xen_mc_flush();
> if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
> BUG();
> +
> + preempt_enable();
> }
>
> static int cvt_gate_to_trap(int vector, u32 low, u32 high,
> @@ -328,11 +339,13 @@ static void xen_write_idt_entry(struct d
> static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
> u32 low, u32 high)
> {
> -
> - int cpu = smp_processor_id();
> unsigned long p = (unsigned long)&dt[entrynum];
> - unsigned long start = per_cpu(idt_desc, cpu).address;
> - unsigned long end = start + per_cpu(idt_desc, cpu).size + 1;
> + unsigned long start, end;
> +
> + preempt_disable();
> +
> + start = __get_cpu_var(idt_desc).address;
> + end = start + __get_cpu_var(idt_desc).size + 1;
>
> xen_mc_flush();
>
> @@ -347,6 +360,8 @@ static void xen_write_idt_entry(struct d
> if (HYPERVISOR_set_trap_table(info))
> BUG();
> }
> +
> + preempt_enable();
> }
>
> static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
> @@ -368,11 +383,9 @@ static void xen_convert_trap_info(const
>
> void xen_copy_trap_info(struct trap_info *traps)
> {
> - const struct Xgt_desc_struct *desc = &get_cpu_var(idt_desc);
> + const struct Xgt_desc_struct *desc = &__get_cpu_var(idt_desc);
>
> xen_convert_trap_info(desc, traps);
> -
> - put_cpu_var(idt_desc);
> }
>
> /* Load a new IDT into Xen. In principle this can be per-CPU, so we
> @@ -382,11 +395,10 @@ static void xen_load_idt(const struct Xg
> {
> static DEFINE_SPINLOCK(lock);
> static struct trap_info traps[257];
> - int cpu = smp_processor_id();
> -
> - per_cpu(idt_desc, cpu) = *desc;
>
> spin_lock(&lock);
> +
> + __get_cpu_var(idt_desc) = *desc;
>
> xen_convert_trap_info(desc, traps);
>
> @@ -402,6 +414,8 @@ static void xen_write_gdt_entry(struct d
> static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
> u32 low, u32 high)
> {
> + preempt_disable();
> +
> switch ((high >> 8) & 0xff) {
> case DESCTYPE_LDT:
> case DESCTYPE_TSS:
> @@ -418,10 +432,12 @@ static void xen_write_gdt_entry(struct d
> }
>
> }
> +
> + preempt_enable();
> }
>
> static void xen_load_esp0(struct tss_struct *tss,
> - struct thread_struct *thread)
> + struct thread_struct *thread)
> {
> struct multicall_space mcs = xen_mc_entry(0);
> MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
> @@ -525,6 +541,8 @@ static unsigned long xen_read_cr3(void)
>
> static void xen_write_cr3(unsigned long cr3)
> {
> + BUG_ON(preemptible());
> +
> if (cr3 == x86_read_percpu(xen_cr3)) {
> /* just a simple tlb flush */
> xen_flush_tlb();
> ===================================================================
> --- a/arch/i386/xen/mmu.c
> +++ b/arch/i386/xen/mmu.c
> @@ -38,6 +38,7 @@
> *
> * Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx>, XenSource Inc, 2007
> */
> +#include <linux/sched.h>
> #include <linux/highmem.h>
> #include <linux/bug.h>
>
> @@ -530,5 +531,7 @@ void xen_exit_mmap(struct mm_struct *mm)
> drop_mm_ref(mm);
> put_cpu();
>
> + spin_lock(&mm->page_table_lock);
> xen_pgd_unpin(mm->pgd);
> -}
> + spin_unlock(&mm->page_table_lock);
> +}
> ===================================================================
> --- a/arch/i386/xen/multicalls.c
> +++ b/arch/i386/xen/multicalls.c
> @@ -20,6 +20,7 @@
> * Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx>, XenSource Inc, 2007
> */
> #include <linux/percpu.h>
> +#include <linux/hardirq.h>
>
> #include <asm/xen/hypercall.h>
>
> @@ -39,9 +40,11 @@ DEFINE_PER_CPU(unsigned long, xen_mc_irq
>
> void xen_mc_flush(void)
> {
> - struct mc_buffer *b = &get_cpu_var(mc_buffer);
> + struct mc_buffer *b = &__get_cpu_var(mc_buffer);
> int ret = 0;
> unsigned long flags;
> +
> + BUG_ON(preemptible());
>
> /* Disable interrupts in case someone comes in and queues
> something in the middle */
> @@ -60,7 +63,6 @@ void xen_mc_flush(void)
> } else
> BUG_ON(b->argidx != 0);
>
> - put_cpu_var(mc_buffer);
> local_irq_restore(flags);
>
> BUG_ON(ret);
> @@ -68,10 +70,11 @@ void xen_mc_flush(void)
>
> struct multicall_space __xen_mc_entry(size_t args)
> {
> - struct mc_buffer *b = &get_cpu_var(mc_buffer);
> + struct mc_buffer *b = &__get_cpu_var(mc_buffer);
> struct multicall_space ret;
> unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64);
>
> + BUG_ON(preemptible());
> BUG_ON(argspace > MC_ARGS);
>
> if (b->mcidx == MC_BATCH ||
> @@ -83,7 +86,5 @@ struct multicall_space __xen_mc_entry(si
> ret.args = &b->args[b->argidx];
> b->argidx += argspace;
>
> - put_cpu_var(mc_buffer);
> -
> return ret;
> }
> ===================================================================
> --- a/arch/i386/xen/time.c
> +++ b/arch/i386/xen/time.c
> @@ -88,7 +88,7 @@ static void get_runstate_snapshot(struct
> u64 state_time;
> struct vcpu_runstate_info *state;
>
> - preempt_disable();
> + BUG_ON(preemptible());
>
> state = &__get_cpu_var(runstate);
>
> @@ -103,8 +103,6 @@ static void get_runstate_snapshot(struct
> *res = *state;
> barrier();
> } while (get64(&state->state_entry_time) != state_time);
> -
> - preempt_enable();
> }
>
> static void setup_runstate_info(int cpu)
> @@ -179,8 +177,18 @@ unsigned long long xen_sched_clock(void)
> unsigned long long xen_sched_clock(void)
> {
> struct vcpu_runstate_info state;
> - cycle_t now = xen_clocksource_read();
> + cycle_t now;
> + u64 ret;
> s64 offset;
> +
> + /*
> + * Ideally sched_clock should be called on a per-cpu basis
> + * anyway, so preempt should already be disabled, but that's
> + * not current practice at the moment.
> + */
> + preempt_disable();
> +
> + now = xen_clocksource_read();
>
> get_runstate_snapshot(&state);
>
> @@ -190,9 +198,13 @@ unsigned long long xen_sched_clock(void)
> if (offset < 0)
> offset = 0;
>
> - return state.time[RUNSTATE_blocked] +
> + ret = state.time[RUNSTATE_blocked] +
> state.time[RUNSTATE_running] +
> offset;
> +
> + preempt_enable();
> +
> + return ret;
> }
>
>
>
>

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/