[patch 30/32] Fix AF_UNIX OOPS

From: Chris Wright
Date: Fri Jun 08 2007 - 03:23:16 EST


-stable review patch. If anyone has any objections, please let us know.
---------------------

From: David Miller <davem@xxxxxxxxxxxxx>

This combines two upstream commits to fix an OOPS with
AF_UNIX and SELINUX.

basically, sk->sk_socket can become null because we access
a peer socket without any locking, so it can be shut down and
released in another thread.

Commit: d410b81b4eef2e4409f9c38ef201253fbbcc7d94
[AF_UNIX]: Make socket locking much less confusing.

The unix_state_*() locking macros imply that there is some
rwlock kind of thing going on, but the implementation is
actually a spinlock which makes the code more confusing than
it needs to be.

So use plain unix_state_lock and unix_state_unlock.

Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>

Commit: 19fec3e807a487415e77113cb9dbdaa2da739836
[AF_UNIX]: Fix datagram connect race causing an OOPS.

Based upon an excellent bug report and initial patch by
Frederik Deweerdt.

The UNIX datagram connect code blindly dereferences other->sk_socket
via the call down to the security_unix_may_send() function.

Without locking 'other' that pointer can go NULL via unix_release_sock()
which does sock_orphan() which also marks the socket SOCK_DEAD.

So we have to lock both 'sk' and 'other' yet avoid all kinds of
potential deadlocks (connect to self is OK for datagram sockets and it
is possible for two datagram sockets to perform a simultaneous connect
to each other). So what we do is have a "double lock" function similar
to how we handle this situation in other areas of the kernel. We take
the lock of the socket pointer with the smallest address first in
order to avoid ABBA style deadlocks.

Once we have them both locked, we check to see if SOCK_DEAD is set
for 'other' and if so, drop everything and retry the lookup.

Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
[chrisw: backport to 2.6.20]
Signed-off-by: Chris Wright <chrisw@xxxxxxxxxxxx>

---
include/net/af_unix.h | 8 +--
net/unix/af_unix.c | 127 +++++++++++++++++++++++++++++++-------------------
2 files changed, 83 insertions(+), 52 deletions(-)

--- linux-2.6.20.13.orig/include/net/af_unix.h
+++ linux-2.6.20.13/include/net/af_unix.h
@@ -62,13 +62,11 @@ struct unix_skb_parms {
#define UNIXCREDS(skb) (&UNIXCB((skb)).creds)
#define UNIXSID(skb) (&UNIXCB((skb)).secid)

-#define unix_state_rlock(s) spin_lock(&unix_sk(s)->lock)
-#define unix_state_runlock(s) spin_unlock(&unix_sk(s)->lock)
-#define unix_state_wlock(s) spin_lock(&unix_sk(s)->lock)
-#define unix_state_wlock_nested(s) \
+#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
+#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
+#define unix_state_lock_nested(s) \
spin_lock_nested(&unix_sk(s)->lock, \
SINGLE_DEPTH_NESTING)
-#define unix_state_wunlock(s) spin_unlock(&unix_sk(s)->lock)

#ifdef __KERNEL__
/* The AF_UNIX socket */
--- linux-2.6.20.13.orig/net/unix/af_unix.c
+++ linux-2.6.20.13/net/unix/af_unix.c
@@ -175,11 +175,11 @@ static struct sock *unix_peer_get(struct
{
struct sock *peer;

- unix_state_rlock(s);
+ unix_state_lock(s);
peer = unix_peer(s);
if (peer)
sock_hold(peer);
- unix_state_runlock(s);
+ unix_state_unlock(s);
return peer;
}

@@ -370,7 +370,7 @@ static int unix_release_sock (struct soc
unix_remove_socket(sk);

/* Clear state */
- unix_state_wlock(sk);
+ unix_state_lock(sk);
sock_orphan(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
dentry = u->dentry;
@@ -379,7 +379,7 @@ static int unix_release_sock (struct soc
u->mnt = NULL;
state = sk->sk_state;
sk->sk_state = TCP_CLOSE;
- unix_state_wunlock(sk);
+ unix_state_unlock(sk);

wake_up_interruptible_all(&u->peer_wait);

@@ -387,12 +387,12 @@ static int unix_release_sock (struct soc

if (skpair!=NULL) {
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
- unix_state_wlock(skpair);
+ unix_state_lock(skpair);
/* No more writes */
skpair->sk_shutdown = SHUTDOWN_MASK;
if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
skpair->sk_err = ECONNRESET;
- unix_state_wunlock(skpair);
+ unix_state_unlock(skpair);
skpair->sk_state_change(skpair);
read_lock(&skpair->sk_callback_lock);
sk_wake_async(skpair,1,POLL_HUP);
@@ -449,7 +449,7 @@ static int unix_listen(struct socket *so
err = -EINVAL;
if (!u->addr)
goto out; /* No listens on an unbound socket */
- unix_state_wlock(sk);
+ unix_state_lock(sk);
if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
goto out_unlock;
if (backlog > sk->sk_max_ack_backlog)
@@ -463,7 +463,7 @@ static int unix_listen(struct socket *so
err = 0;

out_unlock:
- unix_state_wunlock(sk);
+ unix_state_unlock(sk);
out:
return err;
}
@@ -859,6 +859,31 @@ out_mknod_parent:
goto out_up;
}

+static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
+{
+ if (unlikely(sk1 == sk2) || !sk2) {
+ unix_state_lock(sk1);
+ return;
+ }
+ if (sk1 < sk2) {
+ unix_state_lock(sk1);
+ unix_state_lock_nested(sk2);
+ } else {
+ unix_state_lock(sk2);
+ unix_state_lock_nested(sk1);
+ }
+}
+
+static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
+{
+ if (unlikely(sk1 == sk2) || !sk2) {
+ unix_state_unlock(sk1);
+ return;
+ }
+ unix_state_unlock(sk1);
+ unix_state_unlock(sk2);
+}
+
static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
int alen, int flags)
{
@@ -878,11 +903,19 @@ static int unix_dgram_connect(struct soc
!unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
goto out;

+restart:
other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
if (!other)
goto out;

- unix_state_wlock(sk);
+ unix_state_double_lock(sk, other);
+
+ /* Apparently VFS overslept socket death. Retry. */
+ if (sock_flag(other, SOCK_DEAD)) {
+ unix_state_double_unlock(sk, other);
+ sock_put(other);
+ goto restart;
+ }

err = -EPERM;
if (!unix_may_send(sk, other))
@@ -897,7 +930,7 @@ static int unix_dgram_connect(struct soc
* 1003.1g breaking connected state with AF_UNSPEC
*/
other = NULL;
- unix_state_wlock(sk);
+ unix_state_double_lock(sk, other);
}

/*
@@ -906,19 +939,19 @@ static int unix_dgram_connect(struct soc
if (unix_peer(sk)) {
struct sock *old_peer = unix_peer(sk);
unix_peer(sk)=other;
- unix_state_wunlock(sk);
+ unix_state_double_unlock(sk, other);

if (other != old_peer)
unix_dgram_disconnected(sk, old_peer);
sock_put(old_peer);
} else {
unix_peer(sk)=other;
- unix_state_wunlock(sk);
+ unix_state_double_unlock(sk, other);
}
return 0;

out_unlock:
- unix_state_wunlock(sk);
+ unix_state_double_unlock(sk, other);
sock_put(other);
out:
return err;
@@ -937,7 +970,7 @@ static long unix_wait_for_peer(struct so
(skb_queue_len(&other->sk_receive_queue) >
other->sk_max_ack_backlog);

- unix_state_runlock(other);
+ unix_state_unlock(other);

if (sched)
timeo = schedule_timeout(timeo);
@@ -995,11 +1028,11 @@ restart:
goto out;

/* Latch state of peer */
- unix_state_rlock(other);
+ unix_state_lock(other);

/* Apparently VFS overslept socket death. Retry. */
if (sock_flag(other, SOCK_DEAD)) {
- unix_state_runlock(other);
+ unix_state_unlock(other);
sock_put(other);
goto restart;
}
@@ -1049,18 +1082,18 @@ restart:
goto out_unlock;
}

- unix_state_wlock_nested(sk);
+ unix_state_lock_nested(sk);

if (sk->sk_state != st) {
- unix_state_wunlock(sk);
- unix_state_runlock(other);
+ unix_state_unlock(sk);
+ unix_state_unlock(other);
sock_put(other);
goto restart;
}

err = security_unix_stream_connect(sock, other->sk_socket, newsk);
if (err) {
- unix_state_wunlock(sk);
+ unix_state_unlock(sk);
goto out_unlock;
}

@@ -1097,7 +1130,7 @@ restart:
smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
unix_peer(sk) = newsk;

- unix_state_wunlock(sk);
+ unix_state_unlock(sk);

/* take ten and and send info to listening sock */
spin_lock(&other->sk_receive_queue.lock);
@@ -1106,14 +1139,14 @@ restart:
* is installed to listening socket. */
atomic_inc(&newu->inflight);
spin_unlock(&other->sk_receive_queue.lock);
- unix_state_runlock(other);
+ unix_state_unlock(other);
other->sk_data_ready(other, 0);
sock_put(other);
return 0;

out_unlock:
if (other)
- unix_state_runlock(other);
+ unix_state_unlock(other);

out:
if (skb)
@@ -1179,10 +1212,10 @@ static int unix_accept(struct socket *so
wake_up_interruptible(&unix_sk(sk)->peer_wait);

/* attach accepted sock to socket */
- unix_state_wlock(tsk);
+ unix_state_lock(tsk);
newsock->state = SS_CONNECTED;
sock_graft(tsk, newsock);
- unix_state_wunlock(tsk);
+ unix_state_unlock(tsk);
return 0;

out:
@@ -1209,7 +1242,7 @@ static int unix_getname(struct socket *s
}

u = unix_sk(sk);
- unix_state_rlock(sk);
+ unix_state_lock(sk);
if (!u->addr) {
sunaddr->sun_family = AF_UNIX;
sunaddr->sun_path[0] = 0;
@@ -1220,7 +1253,7 @@ static int unix_getname(struct socket *s
*uaddr_len = addr->len;
memcpy(sunaddr, addr->name, *uaddr_len);
}
- unix_state_runlock(sk);
+ unix_state_unlock(sk);
sock_put(sk);
out:
return err;
@@ -1338,7 +1371,7 @@ restart:
goto out_free;
}

- unix_state_rlock(other);
+ unix_state_lock(other);
err = -EPERM;
if (!unix_may_send(sk, other))
goto out_unlock;
@@ -1348,20 +1381,20 @@ restart:
* Check with 1003.1g - what should
* datagram error
*/
- unix_state_runlock(other);
+ unix_state_unlock(other);
sock_put(other);

err = 0;
- unix_state_wlock(sk);
+ unix_state_lock(sk);
if (unix_peer(sk) == other) {
unix_peer(sk)=NULL;
- unix_state_wunlock(sk);
+ unix_state_unlock(sk);

unix_dgram_disconnected(sk, other);
sock_put(other);
err = -ECONNREFUSED;
} else {
- unix_state_wunlock(sk);
+ unix_state_unlock(sk);
}

other = NULL;
@@ -1398,14 +1431,14 @@ restart:
}

skb_queue_tail(&other->sk_receive_queue, skb);
- unix_state_runlock(other);
+ unix_state_unlock(other);
other->sk_data_ready(other, len);
sock_put(other);
scm_destroy(siocb->scm);
return len;

out_unlock:
- unix_state_runlock(other);
+ unix_state_unlock(other);
out_free:
kfree_skb(skb);
out:
@@ -1495,14 +1528,14 @@ static int unix_stream_sendmsg(struct ki
goto out_err;
}

- unix_state_rlock(other);
+ unix_state_lock(other);

if (sock_flag(other, SOCK_DEAD) ||
(other->sk_shutdown & RCV_SHUTDOWN))
goto pipe_err_free;

skb_queue_tail(&other->sk_receive_queue, skb);
- unix_state_runlock(other);
+ unix_state_unlock(other);
other->sk_data_ready(other, size);
sent+=size;
}
@@ -1513,7 +1546,7 @@ static int unix_stream_sendmsg(struct ki
return sent;

pipe_err_free:
- unix_state_runlock(other);
+ unix_state_unlock(other);
kfree_skb(skb);
pipe_err:
if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
@@ -1642,7 +1675,7 @@ static long unix_stream_data_wait(struct
{
DEFINE_WAIT(wait);

- unix_state_rlock(sk);
+ unix_state_lock(sk);

for (;;) {
prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
@@ -1655,14 +1688,14 @@ static long unix_stream_data_wait(struct
break;

set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
- unix_state_runlock(sk);
+ unix_state_unlock(sk);
timeo = schedule_timeout(timeo);
- unix_state_rlock(sk);
+ unix_state_lock(sk);
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
}

finish_wait(sk->sk_sleep, &wait);
- unix_state_runlock(sk);
+ unix_state_unlock(sk);
return timeo;
}

@@ -1817,12 +1850,12 @@ static int unix_shutdown(struct socket *
mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);

if (mode) {
- unix_state_wlock(sk);
+ unix_state_lock(sk);
sk->sk_shutdown |= mode;
other=unix_peer(sk);
if (other)
sock_hold(other);
- unix_state_wunlock(sk);
+ unix_state_unlock(sk);
sk->sk_state_change(sk);

if (other &&
@@ -1834,9 +1867,9 @@ static int unix_shutdown(struct socket *
peer_mode |= SEND_SHUTDOWN;
if (mode&SEND_SHUTDOWN)
peer_mode |= RCV_SHUTDOWN;
- unix_state_wlock(other);
+ unix_state_lock(other);
other->sk_shutdown |= peer_mode;
- unix_state_wunlock(other);
+ unix_state_unlock(other);
other->sk_state_change(other);
read_lock(&other->sk_callback_lock);
if (peer_mode == SHUTDOWN_MASK)
@@ -1974,7 +2007,7 @@ static int unix_seq_show(struct seq_file
else {
struct sock *s = v;
struct unix_sock *u = unix_sk(s);
- unix_state_rlock(s);
+ unix_state_lock(s);

seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
s,
@@ -2002,7 +2035,7 @@ static int unix_seq_show(struct seq_file
for ( ; i < len; i++)
seq_putc(seq, u->addr->name->sun_path[i]);
}
- unix_state_runlock(s);
+ unix_state_unlock(s);
seq_putc(seq, '\n');
}


--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/