[cpuops cmpxchg V1 2/4] x86: this_cpu_cmpxchg and this_cpu_xchg operations

From: Christoph Lameter
Date: Wed Dec 08 2010 - 12:56:52 EST


Provide support as far as the hardware capabilities of the x86 cpus
allow.

Define CONFIG_CMPXCHG_LOCAL in Kconfig.cpu to allow core code to test for
fast cpuops implementations.

Signed-off-by: Christoph Lameter <cl@xxxxxxxxx>

---
arch/x86/Kconfig.cpu | 3
arch/x86/include/asm/percpu.h | 129 +++++++++++++++++++++++++++++++++++++++++-
2 files changed, 131 insertions(+), 1 deletion(-)

Index: linux-2.6/arch/x86/include/asm/percpu.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/percpu.h 2010-12-08 11:33:48.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/percpu.h 2010-12-08 11:34:44.000000000 -0600
@@ -212,6 +212,83 @@ do { \
ret__; \
})

+/*
+ * Beware: xchg on x86 has an implied lock prefix. There will be the cost of
+ * full lock semantics even though they are not needed.
+ */
+#define percpu_xchg_op(var, nval) \
+({ \
+ typeof(var) __ret; \
+ typeof(var) __new = (nval); \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm("xchgb %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "q" (__new) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm("xchgw %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "r" (__new) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm("xchgl %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "r" (__new) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm("xchgq %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "r" (__new) \
+ : "memory"); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ __ret; \
+})
+
+/*
+ * cmpxchg has no such implied lock semantics as a result it is much
+ * more efficient for cpu local operations.
+ */
+#define percpu_cmpxchg_op(var, oval, nval) \
+({ \
+ typeof(var) __ret; \
+ typeof(var) __old = (oval); \
+ typeof(var) __new = (nval); \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm("cmpxchgb %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "q" (__new), "0" (__old) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm("cmpxchgw %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "r" (__new), "0" (__old) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm("cmpxchgl %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "r" (__new), "0" (__old) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm("cmpxchgq %2, "__percpu_arg(1) \
+ : "=a" (__ret), "+m" (var) \
+ : "r" (__new), "0" (__old) \
+ : "memory"); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ __ret; \
+})
+
#define percpu_from_op(op, var, constraint) \
({ \
typeof(var) pfo_ret__; \
@@ -335,6 +412,16 @@ do { \
#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)

+#define __this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
+#define __this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
+#define __this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
+
#ifndef CONFIG_M386
#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
@@ -342,7 +429,39 @@ do { \
#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
-#endif
+
+#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#endif /* !CONFIG_M386 */
+
+#ifndef CONFIG_X86_64
+#ifdef CONFIG_X86_CMPXCHG64
+/* We can support a 8 byte cmpxchg with a special instruction on 32 bit */
+#define __this_cpu_cmpxchg_8(pcp, oval, nval) \
+({ \
+ typeof(var) __ret; \
+ typeof(var) __old = (oval); \
+ typeof(var) __new = (nval); \
+ asm("cmpxchg8b %2, "__percpu_arg(1) \
+ : "=A" (__ret), "+m" (&pcp) \
+ : "b" (((u32)new), "c" ((u32)(new >> 32)), "0" (__old) \
+ : "memory"); \
+ __ret; \
+})
+
+#define this_cpu_cmpxchg_8(pcp, oval, nval) __this_cpu_cmpxchg_8(pcp, oval, nval)
+#define irqsafe_cmpxchg_8(pcp, oval, nval) __this_cpu_cmpxchg_8(pcp, oval, nval)
+
+#endif /* CONFIG_X86_CMPXCHG64 */
+#endif /* !CONFIG_X86_64 */
+
/*
* Per cpu atomic 64 bit operations are only available under 64 bit.
* 32 bit must fall back to generic operations.
@@ -370,6 +489,14 @@ do { \
#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)

+#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+
+#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+
#endif

/* This is not atomic against other CPUs -- CPU preemption needs to be off */
Index: linux-2.6/arch/x86/Kconfig.cpu
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig.cpu 2010-12-08 11:33:48.000000000 -0600
+++ linux-2.6/arch/x86/Kconfig.cpu 2010-12-08 11:33:53.000000000 -0600
@@ -310,6 +310,9 @@ config X86_INTERNODE_CACHE_SHIFT
config X86_CMPXCHG
def_bool X86_64 || (X86_32 && !M386)

+config CMPXCHG_LOCAL
+ def_bool X86_64 || (X86_32 && !M386)
+
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/