Re: [patch] paravirt: isolate module ops
From: Zachary Amsden
Date: Fri Jan 05 2007 - 20:03:35 EST
Ingo Molnar wrote:
Subject: [patch] paravirt: isolate module ops
From: Ingo Molnar <mingo@xxxxxxx>
only export those operations to modules that have been available to them
historically: irq disable/enable, io-delay, udelay, etc.
this isolates that functionality from other paravirtualization
functionality that modules have no business messing with.
boot and build tested with CONFIG_PARAVIRT=y.
I would suggest a slightly different carving. For one, no TLB flushes.
If you can't modify PTEs, why do you need to have TLB flushes? And I
would allow CR0 read / write for code which saves and restores FPU state
- possibly even debug register access, although any code which touches
DRs could be doing something sneaky. I'm on the fence about that one.
Here is a partially tested patch against the -mm tree. Let me know what
you think of this slightly different approach.
diff -r 3242f865ce8e arch/i386/kernel/paravirt.c
--- a/arch/i386/kernel/paravirt.c Thu Jan 04 20:17:02 2007 -0800
+++ b/arch/i386/kernel/paravirt.c Fri Jan 05 16:48:25 2007 -0800
@@ -492,6 +492,7 @@ struct paravirt_ops paravirt_ops = {
.patch = native_patch,
.banner = default_banner,
+
.arch_setup = native_nop,
.memory_setup = machine_specific_memory_setup,
.get_wallclock = native_get_wallclock,
@@ -577,4 +578,42 @@ struct paravirt_ops paravirt_ops = {
.startup_ipi_hook = (void *)native_nop,
};
-EXPORT_SYMBOL(paravirt_ops);
+
+/*
+ * These are exported to modules:
+ */
+struct paravirt_mod_ops paravirt_mod_ops = {
+ .name = "bare hardware",
+ .paravirt_enabled = 0,
+ .kernel_rpl = 0,
+
+ .patch = native_patch,
+ .banner = default_banner,
+
+ .save_fl = native_save_fl,
+ .restore_fl = native_restore_fl,
+ .irq_disable = native_irq_disable,
+ .irq_enable = native_irq_enable,
+
+ .cpuid = native_cpuid,
+
+ .read_msr = native_read_msr,
+ .write_msr = native_write_msr,
+
+ .read_tsc = native_read_tsc,
+ .read_pmc = native_read_pmc,
+
+ .io_delay = native_io_delay,
+ .const_udelay = __const_udelay,
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ .apic_write = native_apic_write,
+ .apic_write_atomic = native_apic_write_atomic,
+ .apic_read = native_apic_read,
+#endif
+
+ .flush_tlb_user = native_flush_tlb,
+ .flush_tlb_kernel = native_flush_tlb_global,
+ .flush_tlb_single = native_flush_tlb_single,
+};
+EXPORT_SYMBOL(paravirt_mod_ops);
diff -r 3242f865ce8e include/asm-i386/delay.h
--- a/include/asm-i386/delay.h Thu Jan 04 20:17:02 2007 -0800
+++ b/include/asm-i386/delay.h Fri Jan 05 16:11:43 2007 -0800
@@ -17,9 +17,9 @@ extern void __delay(unsigned long loops)
extern void __delay(unsigned long loops);
#if defined(CONFIG_PARAVIRT) && !defined(USE_REAL_TIME_DELAY)
-#define udelay(n) paravirt_ops.const_udelay((n) * 0x10c7ul)
+#define udelay(n) paravirt_mod_ops.const_udelay((n) * 0x10c7ul)
-#define ndelay(n) paravirt_ops.const_udelay((n) * 5ul)
+#define ndelay(n) paravirt_mod_ops.const_udelay((n) * 5ul)
#else /* !PARAVIRT || USE_REAL_TIME_DELAY */
diff -r 3242f865ce8e include/asm-i386/paravirt.h
--- a/include/asm-i386/paravirt.h Thu Jan 04 20:17:02 2007 -0800
+++ b/include/asm-i386/paravirt.h Fri Jan 05 16:51:10 2007 -0800
@@ -29,12 +29,52 @@ struct Xgt_desc_struct;
struct Xgt_desc_struct;
struct tss_struct;
struct mm_struct;
-struct paravirt_ops
+struct paravirt_mod_ops
{
unsigned int kernel_rpl;
int paravirt_enabled;
const char *name;
+ /* All the function pointers here are declared as "fastcall"
+ so that we get a specific register-based calling
+ convention. This makes it easier to implement inline
+ assembler replacements. */
+
+ void (fastcall *cpuid)(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx);
+
+ /* CLTS / cr0 may be used for math state save / restore */
+ void (fastcall *clts)(void);
+ unsigned long (fastcall *read_cr0)(void);
+ void (fastcall *write_cr0)(unsigned long);
+
+ unsigned long (fastcall *save_fl)(void);
+ void (fastcall *restore_fl)(unsigned long);
+ void (fastcall *irq_disable)(void);
+ void (fastcall *irq_enable)(void);
+
+ /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
+ u64 (fastcall *read_msr)(unsigned int msr, int *err);
+ int (fastcall *write_msr)(unsigned int msr, u64 val);
+
+ u64 (fastcall *read_tsc)(void);
+ u64 (fastcall *read_pmc)(void);
+
+ void (fastcall *io_delay)(void);
+ void (*const_udelay)(unsigned long loops);
+
+ /* May be needed by device drivers to flush cache */
+ void (fastcall *wbinvd)(void);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ void (fastcall *apic_write)(unsigned long reg, unsigned long v);
+ void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v);
+ unsigned long (fastcall *apic_read)(unsigned long reg);
+#endif
+};
+
+struct paravirt_ops
+{
/*
* Patch may replace one of the defined code sequences with arbitrary
* code, subject to the same register constraints. This generally
@@ -54,21 +94,8 @@ struct paravirt_ops
int (*set_wallclock)(unsigned long);
void (*time_init)(void);
- /* All the function pointers here are declared as "fastcall"
- so that we get a specific register-based calling
- convention. This makes it easier to implement inline
- assembler replacements. */
-
- void (fastcall *cpuid)(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx);
-
unsigned long (fastcall *get_debugreg)(int regno);
void (fastcall *set_debugreg)(int regno, unsigned long value);
-
- void (fastcall *clts)(void);
-
- unsigned long (fastcall *read_cr0)(void);
- void (fastcall *write_cr0)(unsigned long);
unsigned long (fastcall *read_cr2)(void);
void (fastcall *write_cr2)(unsigned long);
@@ -80,20 +107,8 @@ struct paravirt_ops
unsigned long (fastcall *read_cr4)(void);
void (fastcall *write_cr4)(unsigned long);
- unsigned long (fastcall *save_fl)(void);
- void (fastcall *restore_fl)(unsigned long);
- void (fastcall *irq_disable)(void);
- void (fastcall *irq_enable)(void);
void (fastcall *safe_halt)(void);
void (fastcall *halt)(void);
- void (fastcall *wbinvd)(void);
-
- /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
- u64 (fastcall *read_msr)(unsigned int msr, int *err);
- int (fastcall *write_msr)(unsigned int msr, u64 val);
-
- u64 (fastcall *read_tsc)(void);
- u64 (fastcall *read_pmc)(void);
void (fastcall *load_tr_desc)(void);
void (fastcall *load_gdt)(const struct Xgt_desc_struct *);
@@ -114,13 +129,7 @@ struct paravirt_ops
void (fastcall *set_iopl_mask)(unsigned mask);
- void (fastcall *io_delay)(void);
- void (*const_udelay)(unsigned long loops);
-
#ifdef CONFIG_X86_LOCAL_APIC
- void (fastcall *apic_write)(unsigned long reg, unsigned long v);
- void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v);
- unsigned long (fastcall *apic_read)(unsigned long reg);
void (*setup_boot_clock)(void);
void (*setup_secondary_clock)(void);
#endif
@@ -163,8 +172,9 @@ struct paravirt_ops
__attribute__((__section__(".paravirtprobe"))) = fn
extern struct paravirt_ops paravirt_ops;
-
-#define paravirt_enabled() (paravirt_ops.paravirt_enabled)
+extern struct paravirt_mod_ops paravirt_mod_ops;
+
+#define paravirt_enabled() (paravirt_mod_ops.paravirt_enabled)
static inline void load_esp0(struct tss_struct *tss,
struct thread_struct *thread)
@@ -192,7 +202,7 @@ static inline void __cpuid(unsigned int
static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
- paravirt_ops.cpuid(eax, ebx, ecx, edx);
+ paravirt_mod_ops.cpuid(eax, ebx, ecx, edx);
}
/*
@@ -201,10 +211,10 @@ static inline void __cpuid(unsigned int
#define get_debugreg(var, reg) var = paravirt_ops.get_debugreg(reg)
#define set_debugreg(val, reg) paravirt_ops.set_debugreg(reg, val)
-#define clts() paravirt_ops.clts()
-
-#define read_cr0() paravirt_ops.read_cr0()
-#define write_cr0(x) paravirt_ops.write_cr0(x)
+#define clts() paravirt_mod_ops.clts()
+
+#define read_cr0() paravirt_mod_ops.read_cr0()
+#define write_cr0(x) paravirt_mod_ops.write_cr0(x)
#define read_cr2() paravirt_ops.read_cr2()
#define write_cr2(x) paravirt_ops.write_cr2(x)
@@ -225,58 +235,58 @@ static inline void halt(void)
{
paravirt_ops.safe_halt();
}
-#define wbinvd() paravirt_ops.wbinvd()
-
-#define get_kernel_rpl() (paravirt_ops.kernel_rpl)
+#define wbinvd() paravirt_mod_ops.wbinvd()
+
+#define get_kernel_rpl() (paravirt_mod_ops.kernel_rpl)
#define rdmsr(msr,val1,val2) do { \
int _err; \
- u64 _l = paravirt_ops.read_msr(msr,&_err); \
+ u64 _l = paravirt_mod_ops.read_msr(msr,&_err); \
val1 = (u32)_l; \
val2 = _l >> 32; \
} while(0)
#define wrmsr(msr,val1,val2) do { \
u64 _l = ((u64)(val2) << 32) | (val1); \
- paravirt_ops.write_msr((msr), _l); \
+ paravirt_mod_ops.write_msr((msr), _l); \
} while(0)
#define rdmsrl(msr,val) do { \
int _err; \
- val = paravirt_ops.read_msr((msr),&_err); \
-} while(0)
-
-#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val)))
+ val = paravirt_mod_ops.read_msr((msr),&_err); \
+} while(0)
+
+#define wrmsrl(msr,val) (paravirt_mod_ops.write_msr((msr),(val)))
#define wrmsr_safe(msr,a,b) ({ \
u64 _l = ((u64)(b) << 32) | (a); \
- paravirt_ops.write_msr((msr),_l); \
+ paravirt_mod_ops.write_msr((msr),_l); \
})
/* rdmsr with exception handling */
#define rdmsr_safe(msr,a,b) ({ \
int _err; \
- u64 _l = paravirt_ops.read_msr(msr,&_err); \
+ u64 _l = paravirt_mod_ops.read_msr(msr,&_err); \
(*a) = (u32)_l; \
(*b) = _l >> 32; \
_err; })
#define rdtsc(low,high) do { \
- u64 _l = paravirt_ops.read_tsc(); \
+ u64 _l = paravirt_mod_ops.read_tsc(); \
low = (u32)_l; \
high = _l >> 32; \
} while(0)
#define rdtscl(low) do { \
- u64 _l = paravirt_ops.read_tsc(); \
+ u64 _l = paravirt_mod_ops.read_tsc(); \
low = (int)_l; \
} while(0)
-#define rdtscll(val) (val = paravirt_ops.read_tsc())
+#define rdtscll(val) (val = paravirt_mod_ops.read_tsc())
#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
#define rdpmc(counter,low,high) do { \
- u64 _l = paravirt_ops.read_pmc(); \
+ u64 _l = paravirt_mod_ops.read_pmc(); \
low = (u32)_l; \
high = _l >> 32; \
} while(0)
@@ -299,11 +309,11 @@ static inline void halt(void)
/* The paravirtualized I/O functions */
static inline void slow_down_io(void) {
- paravirt_ops.io_delay();
+ paravirt_mod_ops.io_delay();
#ifdef REALLY_SLOW_IO
- paravirt_ops.io_delay();
- paravirt_ops.io_delay();
- paravirt_ops.io_delay();
+ paravirt_mod_ops.io_delay();
+ paravirt_mod_ops.io_delay();
+ paravirt_mod_ops.io_delay();
#endif
}
@@ -313,17 +323,17 @@ static inline void slow_down_io(void) {
*/
static inline void apic_write(unsigned long reg, unsigned long v)
{
- paravirt_ops.apic_write(reg,v);
+ paravirt_mod_ops.apic_write(reg,v);
}
static inline void apic_write_atomic(unsigned long reg, unsigned long v)
{
- paravirt_ops.apic_write_atomic(reg,v);
+ paravirt_mod_ops.apic_write_atomic(reg,v);
}
static inline unsigned long apic_read(unsigned long reg)
{
- return paravirt_ops.apic_read(reg);
+ return paravirt_mod_ops.apic_read(reg);
}
static inline void setup_boot_clock(void)
@@ -447,7 +457,7 @@ static inline unsigned long __raw_local_
"call *%1;"
"popl %%edx; popl %%ecx",
PARAVIRT_SAVE_FLAGS, CLBR_NONE)
- : "=a"(f): "m"(paravirt_ops.save_fl)
+ : "=a"(f): "m"(paravirt_mod_ops.save_fl)
: "memory", "cc");
return f;
}
@@ -458,7 +468,7 @@ static inline void raw_local_irq_restore
"call *%1;"
"popl %%edx; popl %%ecx",
PARAVIRT_RESTORE_FLAGS, CLBR_EAX)
- : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f)
+ : "=a"(f) : "m" (paravirt_mod_ops.restore_fl), "0"(f)
: "memory", "cc");
}
@@ -468,7 +478,7 @@ static inline void raw_local_irq_disable
"call *%0;"
"popl %%edx; popl %%ecx",
PARAVIRT_IRQ_DISABLE, CLBR_EAX)
- : : "m" (paravirt_ops.irq_disable)
+ : : "m" (paravirt_mod_ops.irq_disable)
: "memory", "eax", "cc");
}
@@ -478,7 +488,7 @@ static inline void raw_local_irq_enable(
"call *%0;"
"popl %%edx; popl %%ecx",
PARAVIRT_IRQ_ENABLE, CLBR_EAX)
- : : "m" (paravirt_ops.irq_enable)
+ : : "m" (paravirt_mod_ops.irq_enable)
: "memory", "eax", "cc");
}
@@ -493,26 +503,26 @@ static inline unsigned long __raw_local_
PARAVIRT_SAVE_FLAGS_IRQ_DISABLE,
CLBR_NONE)
: "=a"(f)
- : "m" (paravirt_ops.save_fl),
- "m" (paravirt_ops.irq_disable)
+ : "m" (paravirt_mod_ops.save_fl),
+ "m" (paravirt_mod_ops.irq_disable)
: "memory", "cc");
return f;
}
#define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \
- "call *paravirt_ops+%c[irq_disable];" \
+ "call *paravirt_mod_ops+%c[irq_disable];" \
"popl %%edx; popl %%ecx", \
PARAVIRT_IRQ_DISABLE, CLBR_EAX)
#define STI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \
- "call *paravirt_ops+%c[irq_enable];" \
+ "call *paravirt_mod_ops+%c[irq_enable];" \
"popl %%edx; popl %%ecx", \
PARAVIRT_IRQ_ENABLE, CLBR_EAX)
#define CLI_STI_CLOBBERS , "%eax"
#define CLI_STI_INPUT_ARGS \
, \
- [irq_disable] "i" (offsetof(struct paravirt_ops, irq_disable)), \
- [irq_enable] "i" (offsetof(struct paravirt_ops, irq_enable))
+ [irq_disable] "i" (offsetof(struct paravirt_mod_ops, irq_disable)), \
+ [irq_enable] "i" (offsetof(struct paravirt_mod_ops, irq_enable))
#else /* __ASSEMBLY__ */
@@ -534,13 +544,13 @@ 772:; \
#define DISABLE_INTERRUPTS(clobbers) \
PARA_PATCH(PARAVIRT_IRQ_DISABLE, clobbers, \
pushl %ecx; pushl %edx; \
- call *paravirt_ops+PARAVIRT_irq_disable; \
+ call *paravirt_mod_ops+PARAVIRT_irq_disable; \
popl %edx; popl %ecx) \
#define ENABLE_INTERRUPTS(clobbers) \
PARA_PATCH(PARAVIRT_IRQ_ENABLE, clobbers, \
pushl %ecx; pushl %edx; \
- call *%cs:paravirt_ops+PARAVIRT_irq_enable; \
+ call *%cs:paravirt_mod_ops+PARAVIRT_irq_enable; \
popl %edx; popl %ecx)
#define ENABLE_INTERRUPTS_SYSEXIT \
@@ -548,7 +558,7 @@ 772:; \
jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit)
#define GET_CR0_INTO_EAX \
- call *paravirt_ops+PARAVIRT_read_cr0
+ call *paravirt_mod_ops+PARAVIRT_read_cr0
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_PARAVIRT */
diff -r 3242f865ce8e arch/i386/kernel/asm-offsets.c
--- a/arch/i386/kernel/asm-offsets.c Thu Jan 04 20:17:02 2007 -0800
+++ b/arch/i386/kernel/asm-offsets.c Fri Jan 05 16:49:15 2007 -0800
@@ -104,11 +104,11 @@ void foo(void)
#ifdef CONFIG_PARAVIRT
BLANK();
- OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled);
- OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable);
- OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable);
+ OFFSET(PARAVIRT_enabled, paravirt_mod_ops, paravirt_enabled);
+ OFFSET(PARAVIRT_irq_disable, paravirt_mod_ops, irq_disable);
+ OFFSET(PARAVIRT_irq_enable, paravirt_mod_ops, irq_enable);
OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit);
OFFSET(PARAVIRT_iret, paravirt_ops, iret);
- OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0);
+ OFFSET(PARAVIRT_read_cr0, paravirt_mod_ops, read_cr0);
#endif
}