[cpuops cmpxchg V2 2/5] x86: this_cpu_cmpxchg and this_cpu_xchg operations

From: Christoph Lameter
Date: Tue Dec 14 2010 - 11:29:51 EST


Provide support as far as the hardware capabilities of the x86 cpus
allow.

Define CONFIG_CMPXCHG_LOCAL in Kconfig.cpu to allow core code to test for
fast cpuops implementations.

V1->V2:
- Take out the definition for this_cpu_cmpxchg_8 and move it into
a separate patch.

Signed-off-by: Christoph Lameter <cl@xxxxxxxxx>

---
arch/x86/Kconfig.cpu | 3 +
arch/x86/include/asm/percpu.h | 109 +++++++++++++++++++++++++++++++++++++++++-
2 files changed, 111 insertions(+), 1 deletion(-)

Index: linux-2.6/arch/x86/include/asm/percpu.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/percpu.h 2010-12-10 12:18:46.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/percpu.h 2010-12-10 12:42:18.000000000 -0600
@@ -212,6 +212,83 @@ do { \
ret__; \
})

+/*
+ * Beware: xchg on x86 has an implied lock prefix. There will be the cost of
+ * full lock semantics even though they are not needed.
+ */
+#define percpu_xchg_op(var, nval) \
+({ \
+ typeof(var) __ret; \
+ typeof(var) __new = (nval); \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm("xchgb %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "q" (__new) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm("xchgw %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "r" (__new) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm("xchgl %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "r" (__new) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm("xchgq %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "r" (__new) \
+ : "memory"); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ __ret; \
+})
+
+/*
+ * cmpxchg has no such implied lock semantics as a result it is much
+ * more efficient for cpu local operations.
+ */
+#define percpu_cmpxchg_op(var, oval, nval) \
+({ \
+ typeof(var) __ret; \
+ typeof(var) __old = (oval); \
+ typeof(var) __new = (nval); \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm("cmpxchgb %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "q" (__new), "0" (__old) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm("cmpxchgw %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "r" (__new), "0" (__old) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm("cmpxchgl %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "r" (__new), "0" (__old) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm("cmpxchgq %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "r" (__new), "0" (__old) \
+ : "memory"); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ __ret; \
+})
+
#define percpu_from_op(op, var, constraint) \
({ \
typeof(var) pfo_ret__; \
@@ -335,6 +412,17 @@ do { \
#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)

+/*
+ * Generic fallback operations for __this_cpu_xchg are okay and much faster
+ * than an xchg with forced lock semantics.
+ */
+#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
+
#ifndef CONFIG_M386
#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
@@ -342,7 +430,18 @@ do { \
#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
-#endif
+
+#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#endif /* !CONFIG_M386 */
+
/*
* Per cpu atomic 64 bit operations are only available under 64 bit.
* 32 bit must fall back to generic operations.
@@ -370,6 +469,14 @@ do { \
#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)

+#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+
+#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+
#endif

/* This is not atomic against other CPUs -- CPU preemption needs to be off */
Index: linux-2.6/arch/x86/Kconfig.cpu
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig.cpu 2010-12-10 12:18:46.000000000 -0600
+++ linux-2.6/arch/x86/Kconfig.cpu 2010-12-10 12:18:49.000000000 -0600
@@ -310,6 +310,9 @@ config X86_INTERNODE_CACHE_SHIFT
config X86_CMPXCHG
def_bool X86_64 || (X86_32 && !M386)

+config CMPXCHG_LOCAL
+ def_bool X86_64 || (X86_32 && !M386)
+
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/