Re: [RFC][PATCH] spin loop arch primitives for busy waiting

From: Nicholas Piggin
Date: Tue Apr 04 2017 - 00:11:54 EST


On Tue, 4 Apr 2017 13:02:33 +1000
Nicholas Piggin <npiggin@xxxxxxxxx> wrote:

> On Mon, 3 Apr 2017 17:43:05 -0700
> Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
>

> > But that depends on architectures having some pattern that we *can*
> > abstract. Would some "begin/in-loop/end" pattern like the above be
> > sufficient?
>
> Yes. begin/in/end would be sufficient for powerpc SMT priority, and
> for x86, and it looks like sparc64 too. So we could do that if you
> prefer.

How's this? I changed your name a bit just so we have a common spin_
prefix. With example powerpc implementation and one caller converted
to see the effect.

---
arch/powerpc/include/asm/processor.h | 17 +++++++++++++
include/linux/processor.h | 48 ++++++++++++++++++++++++++++++++++++
kernel/sched/idle.c | 7 +++++-
3 files changed, 71 insertions(+), 1 deletion(-)
create mode 100644 include/linux/processor.h

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index e9bbd450d966..1274dc818e74 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -402,6 +402,23 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)

#ifdef CONFIG_PPC64
#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0)
+
+#ifndef spin_begin
+#define spin_begin() HMT_low()
+#endif
+
+#ifndef spin_cpu_relax
+#define spin_cpu_relax() barrier()
+#endif
+
+#ifndef spin_cpu_yield
+#define spin_cpu_yield()
+#endif
+
+#ifndef spin_end
+#define spin_end() HMT_medium()
+#endif
+
#else
#define cpu_relax() barrier()
#endif
diff --git a/include/linux/processor.h b/include/linux/processor.h
new file mode 100644
index 000000000000..65e5635d0069
--- /dev/null
+++ b/include/linux/processor.h
@@ -0,0 +1,48 @@
+/* Misc low level processor primitives */
+#ifndef _LINUX_PROCESSOR_H
+#define _LINUX_PROCESSOR_H
+
+#include <asm/processor.h>
+
+/*
+ * spin_begin is used before beginning a busy-wait loop, and must be paired
+ * with spin_end when the loop is exited. spin_cpu_relax must be called
+ * within the loop.
+ *
+ * These loop body should be as small and fast as possible, on the order of
+ * tens of instructions/cycles as a guide. It should and avoid calling
+ * cpu_relax, or any "spin" or sleep type of primitive including nested uses
+ * of these primitives. It should not lock or take any other resource.
+ * Violations of this will not cause a bug, but may cause sub optimal
+ * performance.
+ *
+ * These loops are optimized to be used where wait times are expected to be
+ * less than the cost of a context switch (and associated overhead).
+ *
+ * Detection of resource owner and decision to spin or sleep or guest-yield
+ * (e.g., spin lock holder vcpu preempted, or mutex owner not on CPU) can be
+ * tested within the busy loop body if necessary.
+ */
+#ifndef spin_begin
+#define spin_begin()
+#endif
+
+#ifndef spin_cpu_relax
+#define spin_cpu_relax() cpu_relax()
+#endif
+
+/*
+ * spin_cpu_yield may be called to yield (undirected) to the hypervisor if
+ * necessary. This should be used if the wait is expected to take longer
+ * than context switch overhead, but we can't sleep or do a directed yield.
+ */
+#ifndef spin_cpu_yield
+#define spin_cpu_yield() cpu_relax_yield()
+#endif
+
+#ifndef spin_end
+#define spin_end()
+#endif
+
+#endif /* _LINUX_PROCESSOR_H */
+
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index ac6d5176463d..99a032d9f4a9 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -10,6 +10,7 @@
#include <linux/mm.h>
#include <linux/stackprotector.h>
#include <linux/suspend.h>
+#include <linux/processor.h>

#include <asm/tlb.h>

@@ -63,9 +64,13 @@ static noinline int __cpuidle cpu_idle_poll(void)
trace_cpu_idle_rcuidle(0, smp_processor_id());
local_irq_enable();
stop_critical_timings();
+
+ spin_begin();
while (!tif_need_resched() &&
(cpu_idle_force_poll || tick_check_broadcast_expired()))
- cpu_relax();
+ spin_cpu_relax();
+ spin_end();
+
start_critical_timings();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
rcu_idle_exit();
--
2.11.0