[PATCH] idle using PNI monitor/mwait

From: Nakajima, Jun (jun.nakajima@intel.com)
Date: Tue Jul 08 2003 - 16:23:14 EST


Hi Linus,

Attached is a patch that enables PNI (Prescott New Instructions)
monitor/mwait in kernel idle (opcodes are now public). Basically MWAIT
is similar to hlt, but you can avoid IPI to wake up the processor
waiting. A write (by another processor) to the address range specified
by MONITOR would wake up the processor waiting on MWAIT.

Please apply.

Thanks,
Jun

----------------
diff -ur /build/orig/linux-2.5.74/arch/i386/kernel/cpu/intel.c
linux-2.5.74/arch/i386/kernel/cpu/intel.c
--- /build/orig/linux-2.5.74/arch/i386/kernel/cpu/intel.c
2003-07-02 13:43:55.000000000 -0700
+++ linux-2.5.74/arch/i386/kernel/cpu/intel.c 2003-07-08
09:18:28.000000000 -0700
@@ -13,6 +13,7 @@
 
 static int disable_P4_HT __initdata = 0;
 extern int trap_init_f00f_bug(void);
+extern void select_idle_routine(const struct cpuinfo_x86 *c);
 
 #ifdef CONFIG_X86_INTEL_USERCOPY
 /*
@@ -172,7 +173,7 @@
         }
 #endif
 
-
+ select_idle_routine(c);
         if (c->cpuid_level > 1) {
                 /* supports eax=2 call */
                 int i, j, n;
diff -ur /build/orig/linux-2.5.74/arch/i386/kernel/process.c
linux-2.5.74/arch/i386/kernel/process.c
--- /build/orig/linux-2.5.74/arch/i386/kernel/process.c 2003-07-02
13:38:40.000000000 -0700
+++ linux-2.5.74/arch/i386/kernel/process.c 2003-07-08
11:52:42.000000000 -0700
@@ -148,11 +148,56 @@
         }
 }
 
+/*
+ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
+ * which can obviate IPI to trigger checking of need_resched.
+ * We execute MONITOR against need_resched and enter optimized wait
state
+ * through MWAIT. Whenever someone changes need_resched, we would be
woken
+ * up from MWAIT (without an IPI).
+ */
+static void mwait_idle (void)
+{
+ local_irq_enable();
+
+ if (!need_resched()) {
+ set_thread_flag(TIF_POLLING_NRFLAG);
+ do {
+ __monitor((void *)&current_thread_info()->flags,
0, 0);
+ if (need_resched())
+ break;
+ __mwait(0, 0);
+ } while (!need_resched());
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ }
+}
+
+void __init select_idle_routine(const struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_MWAIT)) {
+ printk("Monitor/Mwait feature present.\n");
+ /*
+ * Skip, if setup has overridden idle.
+ * Also, take care of system with asymmetric CPUs.
+ * Use, mwait_idle only if all cpus support it.
+ * If not, we fallback to default_idle()
+ */
+ if (!pm_idle) {
+ pm_idle = mwait_idle;
+ }
+ return;
+ }
+ pm_idle = default_idle;
+ return;
+}
+
 static int __init idle_setup (char *str)
 {
         if (!strncmp(str, "poll", 4)) {
                 printk("using polling idle threads.\n");
                 pm_idle = poll_idle;
+ } else if (!strncmp(str, "halt", 4)) {
+ printk("using halt in idle threads.\n");
+ pm_idle = default_idle;
         }
 
         return 1;
diff -ur /build/orig/linux-2.5.74/include/asm-i386/cpufeature.h
linux-2.5.74/include/asm-i386/cpufeature.h
--- /build/orig/linux-2.5.74/include/asm-i386/cpufeature.h
2003-07-02 13:51:50.000000000 -0700
+++ linux-2.5.74/include/asm-i386/cpufeature.h 2003-07-08
09:18:28.000000000 -0700
@@ -71,6 +71,8 @@
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep
*/
+#define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */
+
 
 /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word
5 */
 #define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore
insn) */
diff -ur /build/orig/linux-2.5.74/include/asm-i386/processor.h
linux-2.5.74/include/asm-i386/processor.h
--- /build/orig/linux-2.5.74/include/asm-i386/processor.h
2003-07-02 13:40:24.000000000 -0700
+++ linux-2.5.74/include/asm-i386/processor.h 2003-07-08
09:18:28.000000000 -0700
@@ -272,6 +272,22 @@
 #define pc98 0
 #endif
 
+static __inline__ void __monitor(const void *eax, unsigned long ecx,
+ unsigned long edx)
+{
+ /* "monitor %eax,%ecx,%edx;" */
+ asm volatile(
+ ".byte 0x0f,0x01,0xc8;"
+ : :"a" (eax), "c" (ecx), "d"(edx));
+}
+
+static __inline__ void __mwait(unsigned long eax, unsigned long ecx)
+{
+ /* "mwait %eax,%ecx;" */
+ asm volatile(
+ ".byte 0x0f,0x01,0xc9;"
+ : :"a" (eax), "c" (ecx));
+}
 
 /* from system description table in BIOS. Mostly for MCA use, but
 others may find it useful. */



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Tue Jul 15 2003 - 22:00:29 EST