Re: nmi_watchdog=2 regression in 2.6.21

From: Stephane Eranian
Date: Fri Aug 31 2007 - 12:22:22 EST


Daniel,

On Fri, Aug 31, 2007 at 07:43:20AM -0700, Daniel Walker wrote:
> On Thu, 2007-08-30 at 14:05 -0700, Stephane Eranian wrote:
> > Daniel,
>
> > Yes, I realized I missed a small detail in the switch statement.
> > Could you try the new version?
>
> This patch still has the stuck NMI .. Essentially the same thing that
> happened without the patch..
>
Ok, looks like deaulting to P6 does not quite work.

Here is a new version. This time I used a different approach.
I am must admit I am a bit puzzled by the duplication of information
between the wd_ops and the nmi_watchdog_ctlblk structure. My understanding
is that thelater is used as a cache for the info that needs to be per-cpu.

The wd_ops provides the MSR to use for the counter, yet all the setup_*()
routines hardcode the MSR. Not sure why?

In this patch, the setup_*() routine now extract the MSR from the wd_ops
to copy them into the nmi_watchdog_ctlblk. This is not done for P4 because
of the special and ugly case of HT.

With this approach, we can now create a custom wd_ops for CoreDuo that is
a clone of the intel_arch_wd_ops, except for the MSR.

Could you try this one instead?

Thanks.

--

-Stephane
diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c
index 9b5d6af..f9066e1 100644
--- a/arch/i386/kernel/cpu/perfctr-watchdog.c
+++ b/arch/i386/kernel/cpu/perfctr-watchdog.c
@@ -271,8 +271,8 @@ static int setup_k7_watchdog(unsigned nmi_hz)
unsigned int evntsel;
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);

- perfctr_msr = MSR_K7_PERFCTR0;
- evntsel_msr = MSR_K7_EVNTSEL0;
+ perfctr_msr = wd_ops->perfctr;
+ evntsel_msr = wd_ops->evntsel;

wrmsrl(perfctr_msr, 0UL);

@@ -351,8 +351,8 @@ static int setup_p6_watchdog(unsigned nmi_hz)
unsigned int evntsel;
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);

- perfctr_msr = MSR_P6_PERFCTR0;
- evntsel_msr = MSR_P6_EVNTSEL0;
+ perfctr_msr = wd_ops->perfctr;
+ evntsel_msr = wd_ops->evntsel;

/* KVM doesn't implement this MSR */
if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
@@ -577,8 +577,8 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
return 0;

- perfctr_msr = MSR_ARCH_PERFMON_PERFCTR1;
- evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL1;
+ perfctr_msr = wd_ops->perfctr;
+ evntsel_msr = wd_ops->evntsel;

wrmsrl(perfctr_msr, 0UL);

@@ -613,6 +613,16 @@ static struct wd_ops intel_arch_wd_ops = {
.evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
};

+static struct wd_ops coreduo_wd_ops = {
+ .reserve = single_msr_reserve,
+ .unreserve = single_msr_unreserve,
+ .setup = setup_intel_arch_watchdog,
+ .rearm = p6_rearm,
+ .stop = single_msr_stop_watchdog,
+ .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+ .evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
+};
+
static void probe_nmi_watchdog(void)
{
switch (boot_cpu_data.x86_vendor) {
@@ -623,6 +633,10 @@ static void probe_nmi_watchdog(void)
wd_ops = &k7_wd_ops;
break;
case X86_VENDOR_INTEL:
+ if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
+ wd_ops = &coreduo_wd_ops;
+ break;
+ }
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
wd_ops = &intel_arch_wd_ops;
break;