[GIT PULL] x86/timers changes for v4.9

From: Ingo Molnar
Date: Mon Oct 03 2016 - 06:20:30 EST


Linus,

Please pull the latest x86-timers-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-timers-for-linus

# HEAD: 6baf3d61821f5b38f27b4e9f044ad4d1e8f3d14f x86/tsc: Add additional Intel CPU models to the crystal quirk list

This tree includes a HPET overhead micro-optimization plus new TSC frequencies for
newer Intel CPUs.

Thanks,

Ingo

------------------>
Prarit Bhargava (2):
x86/tsc: Use cpu id defines instead of hex constants
x86/tsc: Add additional Intel CPU models to the crystal quirk list

Waiman Long (1):
x86/hpet: Reduce HPET counter read contention


arch/x86/kernel/hpet.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kernel/tsc.c | 12 +++++--
2 files changed, 103 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index c6dfd801df97..274fab99169d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -756,10 +756,104 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd)
/*
* Clock source related code
*/
+#if defined(CONFIG_SMP) && defined(CONFIG_64BIT)
+/*
+ * Reading the HPET counter is a very slow operation. If a large number of
+ * CPUs are trying to access the HPET counter simultaneously, it can cause
+ * massive delay and slow down system performance dramatically. This may
+ * happen when HPET is the default clock source instead of TSC. For a
+ * really large system with hundreds of CPUs, the slowdown may be so
+ * severe that it may actually crash the system because of a NMI watchdog
+ * soft lockup, for example.
+ *
+ * If multiple CPUs are trying to access the HPET counter at the same time,
+ * we don't actually need to read the counter multiple times. Instead, the
+ * other CPUs can use the counter value read by the first CPU in the group.
+ *
+ * This special feature is only enabled on x86-64 systems. It is unlikely
+ * that 32-bit x86 systems will have enough CPUs to require this feature
+ * with its associated locking overhead. And we also need 64-bit atomic
+ * read.
+ *
+ * The lock and the hpet value are stored together and can be read in a
+ * single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t
+ * is 32 bits in size.
+ */
+union hpet_lock {
+ struct {
+ arch_spinlock_t lock;
+ u32 value;
+ };
+ u64 lockval;
+};
+
+static union hpet_lock hpet __cacheline_aligned = {
+ { .lock = __ARCH_SPIN_LOCK_UNLOCKED, },
+};
+
+static cycle_t read_hpet(struct clocksource *cs)
+{
+ unsigned long flags;
+ union hpet_lock old, new;
+
+ BUILD_BUG_ON(sizeof(union hpet_lock) != 8);
+
+ /*
+ * Read HPET directly if in NMI.
+ */
+ if (in_nmi())
+ return (cycle_t)hpet_readl(HPET_COUNTER);
+
+ /*
+ * Read the current state of the lock and HPET value atomically.
+ */
+ old.lockval = READ_ONCE(hpet.lockval);
+
+ if (arch_spin_is_locked(&old.lock))
+ goto contended;
+
+ local_irq_save(flags);
+ if (arch_spin_trylock(&hpet.lock)) {
+ new.value = hpet_readl(HPET_COUNTER);
+ /*
+ * Use WRITE_ONCE() to prevent store tearing.
+ */
+ WRITE_ONCE(hpet.value, new.value);
+ arch_spin_unlock(&hpet.lock);
+ local_irq_restore(flags);
+ return (cycle_t)new.value;
+ }
+ local_irq_restore(flags);
+
+contended:
+ /*
+ * Contended case
+ * --------------
+ * Wait until the HPET value change or the lock is free to indicate
+ * its value is up-to-date.
+ *
+ * It is possible that old.value has already contained the latest
+ * HPET value while the lock holder was in the process of releasing
+ * the lock. Checking for lock state change will enable us to return
+ * the value immediately instead of waiting for the next HPET reader
+ * to come along.
+ */
+ do {
+ cpu_relax();
+ new.lockval = READ_ONCE(hpet.lockval);
+ } while ((new.value == old.value) && arch_spin_is_locked(&new.lock));
+
+ return (cycle_t)new.value;
+}
+#else
+/*
+ * For UP or 32-bit.
+ */
static cycle_t read_hpet(struct clocksource *cs)
{
return (cycle_t)hpet_readl(HPET_COUNTER);
}
+#endif

static struct clocksource clocksource_hpet = {
.name = "hpet",
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 78b9cb5a26af..46b2f41f8b05 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -23,6 +23,7 @@
#include <asm/x86_init.h>
#include <asm/geode.h>
#include <asm/apic.h>
+#include <asm/intel-family.h>

unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -686,11 +687,16 @@ unsigned long native_calibrate_tsc(void)

if (crystal_khz == 0) {
switch (boot_cpu_data.x86_model) {
- case 0x4E: /* SKL */
- case 0x5E: /* SKL */
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
crystal_khz = 24000; /* 24.0 MHz */
break;
- case 0x5C: /* BXT */
+ case INTEL_FAM6_SKYLAKE_X:
+ crystal_khz = 25000; /* 25.0 MHz */
+ break;
+ case INTEL_FAM6_ATOM_GOLDMONT:
crystal_khz = 19200; /* 19.2 MHz */
break;
}