Re: [patch 5/5] x86: mce: Add cmci poll mode

From: Borislav Petkov
Date: Thu Jun 07 2012 - 14:14:04 EST


On Wed, Jun 06, 2012 at 09:53:24PM +0000, Thomas Gleixner wrote:

[ â ]

> Index: tip/arch/x86/kernel/cpu/mcheck/mce_intel.c
> ===================================================================
> --- tip.orig/arch/x86/kernel/cpu/mcheck/mce_intel.c
> +++ tip/arch/x86/kernel/cpu/mcheck/mce_intel.c
> @@ -15,6 +15,8 @@
> #include <asm/msr.h>
> #include <asm/mce.h>
>
> +#include "mce-internal.h"
> +
> /*
> * Support for Intel Correct Machine Check Interrupts. This allows
> * the CPU to raise an interrupt when a corrected machine check happened.
> @@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_b
> */
> static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
>
> -#define CMCI_THRESHOLD 1
> +#define CMCI_THRESHOLD 1
> +#define CMCI_POLL_INTERVAL (30 * HZ)
> +#define CMCI_STORM_INTERVAL (1 * HZ)
> +#define CMCI_STORM_TRESHOLD 5

Just a spelling correction:

CMCI_STORM_THRESHOLD

> +
> +static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
> +static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
> +static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
> +
> +enum {
> + CMCI_STORM_NONE,
> + CMCI_STORM_ACTIVE,
> + CMCI_STORM_SUBSIDED,
> +};
> +
> +static atomic_t cmci_storm_on_cpus;
>
> static int cmci_supported(int *banks)
> {
> @@ -53,6 +70,73 @@ static int cmci_supported(int *banks)
> return !!(cap & MCG_CMCI_P);
> }
>
> +void mce_intel_cmci_poll(void)
> +{
> + if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
> + return;
> + machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
> +}
> +
> +unsigned long mce_intel_adjust_timer(unsigned long interval)
> +{
> + if (interval < CMCI_POLL_INTERVAL)
> + return interval;
> +
> + switch (__this_cpu_read(cmci_storm_state)) {
> + case CMCI_STORM_ACTIVE:
> + /*
> + * We switch back to interrupt mode once the poll timer has
> + * silenced itself. That means no events recorded and the
> + * timer interval is back to our poll interval.
> + */
> + __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
> + atomic_dec(&cmci_storm_on_cpus);
> +
> + case CMCI_STORM_SUBSIDED:
> + /*
> + * We wait for all cpus to go back to SUBSIDED
> + * state. When that happens we switch back to
> + * interrupt mode.
> + */
> + if (!atomic_read(&cmci_storm_on_cpus)) {
> + __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
> + cmci_reenable();
> + cmci_recheck();
> + }
> + return CMCI_POLL_INTERVAL;
> + default:
> + /*
> + * We have shiny wheather, let the poll do whatever it
> + * thinks.
> + */
> + return interval;
> + }
> +}
> +
> +static bool cmci_storm_detect(void)
> +{
> + unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
> + unsigned long ts = __this_cpu_read(cmci_time_stamp);
> + unsigned long now = jiffies;
> +
> + if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
> + cnt++;
> + } else {
> + cnt = 1;
> + __this_cpu_write(cmci_time_stamp, now);
> + }
> + __this_cpu_write(cmci_storm_cnt, cnt);
> +
> + if (cnt <= CMCI_STORM_TRESHOLD)

and here too.

> + return false;
> +
> + cmci_clear();
> + __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
> + atomic_inc(&cmci_storm_on_cpus);
> + mce_timer_kick(CMCI_POLL_INTERVAL);
> + return true;
> +}
> +
> /*
> * The interrupt handler. This is called on every event.
> * Just call the poller directly to log any events.
> @@ -61,6 +145,8 @@ static int cmci_supported(int *banks)
> */
> static void intel_threshold_interrupt(void)
> {
> + if (cmci_storm_detect())
> + return;
> machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
> mce_notify_irq();
> }
>
>
>

--
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
GM: Alberto Bozzo
Reg: Dornach, Landkreis Muenchen
HRB Nr. 43632 WEEE Registernr: 129 19551

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/