[PATCH 7/7] locking/qspinlock: Collect queued unfair lock slowpath statistics

From: Waiman Long
Date: Sat Jul 11 2015 - 16:37:38 EST


This patch enables the accumulation of unfair qspinlock statistics
when the CONFIG_QUEUED_LOCK_STAT configuration parameter is set.

The accumulated lock statistics will be reported in debugfs under
the unfair-qspinlock directory.

On a KVM guest with 32 vCPUs, the statistics counts after bootup were:

lsteal_cnts = 172219 2377 425 118 33 8 5 12 14 0 0 0
trylock_cnt = 1495372

So most of the lock stealing happened in the initial trylock before
entering the queue. Once a vCPU is in the queue, the chance of getting
the lock drop off significantly the further it is away from queue head.

Signed-off-by: Waiman Long <Waiman.Long@xxxxxx>
---
arch/x86/Kconfig | 7 ++-
kernel/locking/qspinlock.c | 2 +-
kernel/locking/qspinlock_unfair.h | 89 +++++++++++++++++++++++++++++++++++++
3 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 299a1c4..aee6236 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -680,11 +680,12 @@ config PARAVIRT_SPINLOCKS
If you are unsure how to answer this question, answer Y.

config QUEUED_LOCK_STAT
- bool "Paravirt queued lock statistics"
- depends on PARAVIRT && DEBUG_FS && QUEUED_SPINLOCKS
+ bool "Paravirt/Unfair queued lock statistics"
+ depends on DEBUG_FS && QUEUED_SPINLOCKS
---help---
Enable the collection of statistical data on the behavior of
- paravirtualized queued spinlocks and report them on debugfs.
+ paravirtualized and unfair queued spinlocks and report them
+ on debugfs.

source "arch/x86/xen/Kconfig"

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 65dead9..12e2e89 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -538,7 +538,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath);
#ifdef queued_spin_trylock
#undef queued_spin_trylock
#endif
-#define queued_spin_trylock queued_spin_trylock_unfair
+#define queued_spin_trylock __queued_spin_trylock_unfair

/*
* The unfair lock code is used internally and so don't need to be exported
diff --git a/kernel/locking/qspinlock_unfair.h b/kernel/locking/qspinlock_unfair.h
index 0e8a40f..fc94578 100644
--- a/kernel/locking/qspinlock_unfair.h
+++ b/kernel/locking/qspinlock_unfair.h
@@ -44,6 +44,93 @@ struct uf_node {
u32 prev_tail; /* Previous node tail code */
};

+#ifdef CONFIG_QUEUED_LOCK_STAT
+
+#include <linux/debugfs.h>
+
+/*
+ * Unfair qspinlock statistics
+ *
+ * All spinning CPUs are grouped into buckets depending on the most
+ * significant bit in their lock stealing period. The first entry in
+ * the array is for the queue head.
+ */
+#define NR_LPERIOD_CNTS (LPERIOD_THRESHOLD_SHIFT - LPERIOD_MIN_SHIFT + 6)
+static atomic_t lsteal_cnts[NR_LPERIOD_CNTS];
+
+/*
+ * # of successful trylocks at beginning of slowpath
+ */
+static atomic_t trylock_cnt;
+
+/*
+ * Counts reset flag
+ */
+static bool reset_cnts __read_mostly;
+
+/*
+ * Initialize debugfs for the unfair qspinlock statistics
+ */
+static int __init unfair_qspinlock_debugfs(void)
+{
+ struct dentry *d_ufqlock = debugfs_create_dir("unfair-qspinlock", NULL);
+
+ if (!d_ufqlock)
+ printk(KERN_WARNING
+ "Could not create 'unfair-qspinlock' debugfs directory\n");
+
+ debugfs_create_u32_array("lsteal_cnts", 0444, d_ufqlock,
+ (u32 *)lsteal_cnts, NR_LPERIOD_CNTS);
+ debugfs_create_u32("trylock_cnt", 0444, d_ufqlock, (u32 *)&trylock_cnt);
+ debugfs_create_bool("reset_cnts", 0644, d_ufqlock, (u32 *)&reset_cnts);
+ return 0;
+}
+fs_initcall(unfair_qspinlock_debugfs);
+
+/*
+ * Reset all the statistics counts
+ */
+static noinline void reset_counts(void)
+{
+ int idx;
+
+ reset_cnts = false;
+ atomic_set(&trylock_cnt, 0);
+ for (idx = 0 ; idx < NR_LPERIOD_CNTS; idx++)
+ atomic_set(&lsteal_cnts[idx], 0);
+}
+
+/*
+ * Increment the unfair qspinlock statistic count
+ */
+static inline void ustat_inc(struct uf_node *pn)
+{
+ /*
+ * fls() returns the most significant 1 bit position + 1
+ */
+ int idx = fls(pn->lsteal_period) - LPERIOD_MIN_SHIFT;
+
+ if (idx >= NR_LPERIOD_CNTS)
+ idx = NR_LPERIOD_CNTS - 1;
+ atomic_inc(&lsteal_cnts[idx]);
+ if (unlikely(reset_cnts))
+ reset_counts();
+}
+
+static inline bool __queued_spin_trylock_unfair(struct qspinlock *lock)
+{
+ bool ret = queued_spin_trylock_unfair(lock);
+
+ if (ret)
+ atomic_inc(&trylock_cnt);
+ return ret;
+}
+
+#else /* CONFIG_QUEUED_LOCK_STAT */
+static inline void ustat_inc(struct uf_node *pn) { }
+#define __queued_spin_trylock_unfair queued_spin_trylock_unfair
+#endif /* CONFIG_QUEUED_LOCK_STAT */
+
/**
* cmpxchg_tail - Put in the new tail code if it matches the old one
* @lock : Pointer to queue spinlock structure
@@ -125,6 +212,7 @@ static inline bool unfair_wait_node(struct qspinlock *lock,
if (queued_spin_trylock_unfair(lock))
break; /* Got the lock */
}
+ ustat_inc(pn);

/*
* Have stolen the lock, need to remove itself from the wait queue.
@@ -220,6 +308,7 @@ unfair_wait_head(struct qspinlock *lock, struct mcs_spinlock *node, u32 tail)
pn->lsteal_period = LPERIOD_QHEAD;
while (!queued_spin_trylock_unfair(lock))
cpu_relax();
+ ustat_inc(pn);

/*
* Remove tail code in the lock if it is the only one in the queue
--
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/