Re: futex atomic vs ordering constraints

From: Peter Zijlstra
Date: Wed Sep 02 2015 - 08:56:32 EST



So here goes..

Chris, I'm awfully sorry, but I seem to be Tile challenged.

TileGX seems to define:

#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()

However, its atomic_add_return() implementation looks like:

static inline int atomic_add_return(int i, atomic_t *v)
{
int val;
smp_mb(); /* barrier for proper semantics */
val = __insn_fetchadd4((void *)&v->counter, i) + i;
barrier(); /* the "+ i" above will wait on memory */
return val;
}

Which leaves me confused on smp_mb__after_atomic().

That said, your futex ops seem to lack any memory barrier, so naively
I'd add both, its just that your add_return() confuses me.

---
Subject: futex, arch: Make futex atomic ops fully ordered

There was no clear ordering requirement for the two futex atomic
primitives which led to various archs implementing different things.

Since there currently is no clear performance argument (its the syscall
'slow' path) favour mandating full order over complexity seems the right
call.

This patch adds the new ordering requirements to the function comments
(which it places in a more visible location) and updates all arches that
were not already fully ordered (I checked those that have
smp_mb__{before,after}_atomic() definitions, as those are the only ones
that can actually be more relaxed).

In particular:

futex_atomic_op_inuser():

alpha: MB ll/sc (RELEASE) -> MB ll/sc MB
arm: MB ll/sc (RELEASE) -> MB ll/sc MB
mips: ll/sc MB (ACQUIRE) -> MB ll/sc MB

futex_atomic_cmpxchg_inatomic():

alpha: MB ll/sc (RELEASE) -> MB ll/sc MB
mips: ll/sc MB (ACQUIRE) -> MB ll/sc MB

XXX tile (again).

Cc: Ralf Baechle <ralf@xxxxxxxxxxxxxx>
Cc: Russell King <linux@xxxxxxxxxxxxxxxx>
Cc: Richard Henderson <rth@xxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/alpha/include/asm/futex.h | 10 +++++++---
arch/arm/include/asm/futex.h | 3 ++-
arch/mips/include/asm/futex.h | 4 ++++
include/asm-generic/futex.h | 26 +-------------------------
include/linux/futex.h | 35 +++++++++++++++++++++++++++++++++++
5 files changed, 49 insertions(+), 29 deletions(-)

diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h
index f939794363ac..e0d5edad2cd4 100644
--- a/arch/alpha/include/asm/futex.h
+++ b/arch/alpha/include/asm/futex.h
@@ -9,8 +9,8 @@
#include <asm/barrier.h>

#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
+ smp_mb(); \
__asm__ __volatile__( \
- __ASM_SMP_MB \
"1: ldl_l %0,0(%2)\n" \
insn \
"2: stl_c %1,0(%2)\n" \
@@ -27,7 +27,8 @@
" .previous\n" \
: "=&r" (oldval), "=&r"(ret) \
: "r" (uaddr), "r"(oparg) \
- : "memory")
+ : "memory"); \
+ smp_mb()

static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
@@ -90,8 +91,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;

+ smp_mb();
+
__asm__ __volatile__ (
- __ASM_SMP_MB
"1: ldl_l %1,0(%3)\n"
" cmpeq %1,%4,%2\n"
" beq %2,3f\n"
@@ -111,6 +113,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: "r"(uaddr), "r"((long)(int)oldval), "r"(newval)
: "memory");

+ smp_mb();
+
*uval = prev;
return ret;
}
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 5eed82809d82..1be3be8eb8c2 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -34,7 +34,8 @@
__futex_atomic_ex_table("%5") \
: "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \
: "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \
- : "cc", "memory")
+ : "cc", "memory"); \
+ smp_mb()

static inline int
futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index 1de190bdfb9c..2f38cc84fb04 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -20,6 +20,8 @@

#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
{ \
+ smp_mb__before_llsc(); \
+ \
if (cpu_has_llsc && R10000_LLSC_WAR) { \
__asm__ __volatile__( \
" .set push \n" \
@@ -149,6 +151,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;

+ smp_mb__before_llsc();
+
if (cpu_has_llsc && R10000_LLSC_WAR) {
__asm__ __volatile__(
"# futex_atomic_cmpxchg_inatomic \n"
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index e56272c919b5..e3c2a0cea438 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -12,18 +12,6 @@
*
*/

-/**
- * futex_atomic_op_inuser() - Atomic arithmetic operation with constant
- * argument and comparison of the previous
- * futex value with another constant.
- *
- * @encoded_op: encoded operation to execute
- * @uaddr: pointer to user space address
- *
- * Return:
- * 0 - On success
- * <0 - On error
- */
static inline int
futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
@@ -88,19 +76,6 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
return ret;
}

-/**
- * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the
- * uaddr with newval if the current value is
- * oldval.
- * @uval: pointer to store content of @uaddr
- * @uaddr: pointer to user space address
- * @oldval: old value
- * @newval: new value to store to @uaddr
- *
- * Return:
- * 0 - On success
- * <0 - On error
- */
static inline int
futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval)
@@ -121,6 +96,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
}

#else
+
static inline int
futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 6435f46d6e13..fadb46f38800 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -55,6 +55,41 @@ union futex_key {
#ifdef CONFIG_FUTEX
extern void exit_robust_list(struct task_struct *curr);
extern void exit_pi_state_list(struct task_struct *curr);
+
+/**
+ * futex_atomic_op_inuser() - Atomic arithmetic operation with constant
+ * argument and comparison of the previous
+ * futex value with another constant.
+ *
+ * @encoded_op: encoded operation to execute
+ * @uaddr: pointer to user space address
+ *
+ * This operation is assumed to be fully ordered.
+ *
+ * Return:
+ * 0 - On success
+ * <0 - On error
+ */
+extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr);
+
+/**
+ * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the
+ * uaddr with newval if the current value is
+ * oldval.
+ * @uval: pointer to store content of @uaddr
+ * @uaddr: pointer to user space address
+ * @oldval: old value
+ * @newval: new value to store to @uaddr
+ *
+ * This operation is assumed to be fully ordered.
+ *
+ * Return:
+ * 0 - On success
+ * <0 - On error
+ */
+extern int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval);
+
#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
#define futex_cmpxchg_enabled 1
#else
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/