[PATCH 3.15 045/109] ipv4: fix dst race in sk_dst_get()

From: Greg Kroah-Hartman
Date: Sat Jul 26 2014 - 15:21:20 EST


3.15-stable review patch. If anyone has any objections, please let me know.

------------------

From: Eric Dumazet <edumazet@xxxxxxxxxx>

[ Upstream commit f88649721268999bdff09777847080a52004f691 ]

When IP route cache had been removed in linux-3.6, we broke assumption
that dst entries were all freed after rcu grace period. DST_NOCACHE
dst were supposed to be freed from dst_release(). But it appears
we want to keep such dst around, either in UDP sockets or tunnels.

In sk_dst_get() we need to make sure dst refcount is not 0
before incrementing it, or else we might end up freeing a dst
twice.

DST_NOCACHE set on a dst does not mean this dst can not be attached
to a socket or a tunnel.

Then, before actual freeing, we need to observe a rcu grace period
to make sure all other cpus can catch the fact the dst is no longer
usable.

Signed-off-by: Eric Dumazet <edumazet@xxxxxxxxxx>
Reported-by: Dormando <dormando@xxxxxxxxx>
Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
include/net/sock.h | 4 ++--
net/core/dst.c | 16 +++++++++++-----
net/ipv4/ip_tunnel.c | 14 +++++---------
3 files changed, 18 insertions(+), 16 deletions(-)

--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1728,8 +1728,8 @@ sk_dst_get(struct sock *sk)

rcu_read_lock();
dst = rcu_dereference(sk->sk_dst_cache);
- if (dst)
- dst_hold(dst);
+ if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ dst = NULL;
rcu_read_unlock();
return dst;
}
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -269,6 +269,15 @@ again:
}
EXPORT_SYMBOL(dst_destroy);

+static void dst_destroy_rcu(struct rcu_head *head)
+{
+ struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
+
+ dst = dst_destroy(dst);
+ if (dst)
+ __dst_free(dst);
+}
+
void dst_release(struct dst_entry *dst)
{
if (dst) {
@@ -276,11 +285,8 @@ void dst_release(struct dst_entry *dst)

newrefcnt = atomic_dec_return(&dst->__refcnt);
WARN_ON(newrefcnt < 0);
- if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
- dst = dst_destroy(dst);
- if (dst)
- __dst_free(dst);
- }
+ if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+ call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
EXPORT_SYMBOL(dst_release);
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -73,12 +73,7 @@ static void __tunnel_dst_set(struct ip_t
{
struct dst_entry *old_dst;

- if (dst) {
- if (dst->flags & DST_NOCACHE)
- dst = NULL;
- else
- dst_clone(dst);
- }
+ dst_clone(dst);
old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
dst_release(old_dst);
}
@@ -108,13 +103,14 @@ static struct rtable *tunnel_rtable_get(

rcu_read_lock();
dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
+ if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ dst = NULL;
if (dst) {
if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
- rcu_read_unlock();
tunnel_dst_reset(t);
- return NULL;
+ dst_release(dst);
+ dst = NULL;
}
- dst_hold(dst);
}
rcu_read_unlock();
return (struct rtable *)dst;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/