On Fri, 8 Apr 2016, Waiman Long wrote:
This patch attempts to reduce HPET read contention by using the factThat has nothing to do with tasks. clocksource reads can happen from almost
that if more than one task are trying to access HPET at the same time,
it will be more efficient if one task in the group reads the HPET
counter and shares it with the rest of the group instead of each
group member reads the HPET counter individually.
any context. The problem is concurrent access on multiple cpus.
This optimization is enabled on systems with more than 32 CPUs. It canPlease not. What's wrong with enabling it unconditionally?
also be explicitly enabled or disabled by using the new opt_read_hpet
kernel parameter.
+/*This wants to be conditional on CONFIG_SMP. No point in having all that muck
* Clock source related code
*/
static cycle_t read_hpet(struct clocksource *cs)
{
- return (cycle_t)hpet_readl(HPET_COUNTER);
+ int seq, cnt = 0;
+ u32 time;
+
+ if (opt_read_hpet<= 0)
+ return (cycle_t)hpet_readl(HPET_COUNTER);
around for an UP kernel.
+ seq = READ_ONCE(hpet_save.seq);This is voodoo programming and actively dangerous.
+ if (!HPET_SEQ_LOCKED(seq)) {
+ int old, new = seq + 1;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ /*
+ * Set the lock bit (lsb) to get the right to read HPET
+ * counter directly. If successful, read the counter, save
+ * its value, and increment the sequence number. Otherwise,
+ * increment the sequnce number to the expected locked value
+ * for comparison later on.
+ */
+ old = cmpxchg(&hpet_save.seq, seq, new);
+ if (old == seq) {
+ time = hpet_readl(HPET_COUNTER);
+ WRITE_ONCE(hpet_save.hpet, time);
+
+ /* Unlock */
+ smp_store_release(&hpet_save.seq, new + 1);
+ local_irq_restore(flags);
+ return (cycle_t)time;
+ }
+ local_irq_restore(flags);
+ seq = new;
+ }
+
+ /*
+ * Wait until the locked sequence number changes which indicates
+ * that the saved HPET value is up-to-date.
+ */
+ while (READ_ONCE(hpet_save.seq) == seq) {
+ /*
+ * Since reading the HPET is much slower than a single
+ * cpu_relax() instruction, we use two here in an attempt
+ * to reduce the amount of cacheline contention in the
+ * hpet_save.seq cacheline.
+ */
+ cpu_relax();
+ cpu_relax();
+
+ if (likely(++cnt<= HPET_RESET_THRESHOLD))
+ continue;
+
+ /*
+ * In the unlikely event that it takes too long for the lock
+ * holder to read the HPET, we do it ourselves and try to
+ * reset the lock. This will also break a deadlock if it
+ * happens, for example, when the process context lock holder
+ * gets killed in the middle of reading the HPET counter.
+ */
+ time = hpet_readl(HPET_COUNTER);
+ WRITE_ONCE(hpet_save.hpet, time);
+ if (READ_ONCE(hpet_save.seq) == seq) {
+ if (cmpxchg(&hpet_save.seq, seq, seq + 1) == seq)
+ pr_warn("read_hpet: reset hpet seq to 0x%x\n",
+ seq + 1);
CPU0 CPU1 CPU2
lock_hpet()
T1=read_hpet() wait_for_unlock()
store_hpet(T1)
....
T2 = read_hpet()
unlock_hpet()
lock_hpet()
T3 = read_hpet()
store_hpet(T3)
unlock_hpet()
return T3
lock_hpet()
T4 = read_hpet() wait_for_unlock()
store_hpet(T4)
store_hpet(T2)
unlock_hpet() return T2
CPU2 will observe time going backwards.
Thanks,
tglx