[PATCH] [15/26] x86: Don't use MWAIT on AMD Family 10

From: Andi Kleen
Date: Sun Apr 29 2007 - 19:51:01 EST



It doesn't put the CPU into deeper sleep states, so it's better to use the standard
idle loop to save power. But allow to reenable it anyways for benchmarking.

I also removed the obsolete idle=halt on i386

Cc: andreas.herrmann@xxxxxxx

Signed-off-by: Andi Kleen <ak@xxxxxxx>

---
Documentation/kernel-parameters.txt | 11 +++++++++--
arch/i386/kernel/cpu/amd.c | 5 +++++
arch/i386/kernel/process.c | 17 ++++++++---------
arch/x86_64/kernel/process.c | 12 +++++++-----
arch/x86_64/kernel/setup.c | 6 ++++++
include/asm-i386/processor.h | 2 ++
include/asm-x86_64/proto.h | 2 ++
7 files changed, 39 insertions(+), 16 deletions(-)

Index: linux/Documentation/kernel-parameters.txt
===================================================================
--- linux.orig/Documentation/kernel-parameters.txt
+++ linux/Documentation/kernel-parameters.txt
@@ -673,8 +673,15 @@ and is between 256 and 4096 characters.
idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed
See Documentation/ide.txt.

- idle= [HW]
- Format: idle=poll or idle=halt
+ idle= [X86]
+ Format: idle=poll or idle=mwait
+ Poll forces a polling idle loop that can slightly improves the performance
+ of waking up a idle CPU, but will use a lot of power and make the system
+ run hot. Not recommended.
+ idle=mwait. On systems which support MONITOR/MWAIT but the kernel chose
+ to not use it because it doesn't save as much power as a normal idle
+ loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same
+ as idle=poll.

ignore_loglevel [KNL]
Ignore loglevel setting - this will print /all/
Index: linux/arch/i386/kernel/cpu/amd.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/amd.c
+++ linux/arch/i386/kernel/cpu/amd.c
@@ -53,6 +53,8 @@ static __cpuinit int amd_apic_timer_brok
return 0;
}

+int force_mwait __cpuinitdata;
+
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
{
u32 l, h;
@@ -275,6 +277,9 @@ static void __cpuinit init_amd(struct cp

if (amd_apic_timer_broken())
set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability);
+
+ if (c->x86 == 0x10 && !force_mwait)
+ clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
}

static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
Index: linux/arch/i386/kernel/process.c
===================================================================
--- linux.orig/arch/i386/kernel/process.c
+++ linux/arch/i386/kernel/process.c
@@ -272,25 +272,24 @@ void __devinit select_idle_routine(const
}
}

-static int __init idle_setup (char *str)
+static int __init idle_setup(char *str)
{
- if (!strncmp(str, "poll", 4)) {
+ if (!strcmp(str, "poll")) {
printk("using polling idle threads.\n");
pm_idle = poll_idle;
#ifdef CONFIG_X86_SMP
if (smp_num_siblings > 1)
printk("WARNING: polling idle and HT enabled, performance may degrade.\n");
#endif
- } else if (!strncmp(str, "halt", 4)) {
- printk("using halt in idle threads.\n");
- pm_idle = default_idle;
- }
+ } else if (!strcmp(str, "mwait"))
+ force_mwait = 1;
+ else
+ return -1;

boot_option_idle_override = 1;
- return 1;
+ return 0;
}
-
-__setup("idle=", idle_setup);
+early_param("idle", idle_setup);

void show_regs(struct pt_regs * regs)
{
Index: linux/arch/x86_64/kernel/process.c
===================================================================
--- linux.orig/arch/x86_64/kernel/process.c
+++ linux/arch/x86_64/kernel/process.c
@@ -288,16 +288,18 @@ void __cpuinit select_idle_routine(const

static int __init idle_setup (char *str)
{
- if (!strncmp(str, "poll", 4)) {
+ if (!strcmp(str, "poll")) {
printk("using polling idle threads.\n");
pm_idle = poll_idle;
- }
+ } else if (!strcmp(str, "mwait"))
+ force_mwait = 1;
+ else
+ return -1;

boot_option_idle_override = 1;
- return 1;
+ return 0;
}
-
-__setup("idle=", idle_setup);
+early_param("idle", idle_setup);

/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs * regs)
Index: linux/arch/x86_64/kernel/setup.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup.c
+++ linux/arch/x86_64/kernel/setup.c
@@ -79,6 +79,8 @@ int bootloader_type;

unsigned long saved_video_mode;

+int force_mwait __cpuinitdata;
+
/*
* Early DMI memory
*/
@@ -604,6 +606,10 @@ static void __cpuinit init_amd(struct cp

/* RDTSC can be speculated around */
clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+
+ /* Family 10 doesn't support C states in MWAIT so don't use it */
+ if (c->x86 == 0x10 && !force_mwait)
+ clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
}

static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
Index: linux/include/asm-i386/processor.h
===================================================================
--- linux.orig/include/asm-i386/processor.h
+++ linux/include/asm-i386/processor.h
@@ -779,4 +779,6 @@ extern int sysenter_setup(void);
extern void cpu_set_gdt(int);
extern void cpu_init(void);

+extern int force_mwait;
+
#endif /* __ASM_I386_PROCESSOR_H */
Index: linux/include/asm-x86_64/proto.h
===================================================================
--- linux.orig/include/asm-x86_64/proto.h
+++ linux/include/asm-x86_64/proto.h
@@ -119,6 +119,8 @@ extern int gsi_irq_sharing(int gsi);

extern void smp_local_timer_interrupt(void);

+extern int force_mwait;
+
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);

void i8254_timer_resume(void);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/