Re: net/ipv4: use-after-free in ipv4_mtu

From: Cong Wang
Date: Wed Apr 05 2017 - 18:34:14 EST


On Tue, Apr 4, 2017 at 7:45 PM, Eric Dumazet <eric.dumazet@xxxxxxxxx> wrote:
> On Tue, 2017-04-04 at 18:11 -0700, Cong Wang wrote:
>> On Tue, Apr 4, 2017 at 11:51 AM, Eric Dumazet <edumazet@xxxxxxxxxx> wrote:
>> > Looking at fib->fib_metrics, I fail to understand how the following can work :
>> >
>> > dst_init_metrics(&rt->dst, fi->fib_metrics, true);
>> >
>> > In the cases fi->fib_metrics is _not_ dst_default_metrics,
>> > fi->fib_metrics can be freed when the fib is deleted,
>> > while dst(s) have still the 'read only pointer'.
>> >
>> > RCU grace period before fi->fib_metrics freeing does not help.
>> >
>> > Without refcounts, it looks like we need to copy the fib_metrics.
>>
>> The dst is obtained from sk_dst_cache which is cached for a fast
>> path where fib_info is obtained in fib_lookup() without refcnt:
>>
>> err = fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF);
>>
>>
>> ...
>> if (!(fib_flags & FIB_LOOKUP_NOREF))
>> atomic_inc(&fi->fib_clntref);
>>
>>
>> This probably starts from:
>>
>> commit ebc0ffae5dfb4447e0a431ffe7fe1d467c48bbb9
>> Author: Eric Dumazet <eric.dumazet@xxxxxxxxx>
>> Date: Tue Oct 5 10:41:36 2010 +0000
>>
>> fib: RCU conversion of fib_lookup()
>
> Interesting. I might had too many beers tonight, but ...
>
> refcount was removed in 2860583fe840 many months later

Good find! I missed the refcnt in rt_set_nexthop() before that commit.

We need to revert that commit to restore the refcnt for fib_info.
diff --git a/include/net/route.h b/include/net/route.h
index c0874c8..3917d0a 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -69,6 +69,7 @@ struct rtable {

struct list_head rt_uncached;
struct uncached_list *rt_uncached_list;
+ struct fib_info *fi; /* for refcnt to shared metrics */
};

static inline bool rt_is_input_route(const struct rtable *rt)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8471dd1..514d7e9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1391,6 +1391,11 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
{
struct rtable *rt = (struct rtable *) dst;

+ if (rt->fi) {
+ fib_info_put(rt->fi);
+ rt->fi = NULL;
+ }
+
if (!list_empty(&rt->rt_uncached)) {
struct uncached_list *ul = rt->rt_uncached_list;

@@ -1428,6 +1433,16 @@ static bool rt_cache_valid(const struct rtable *rt)
!rt_is_expired(rt);
}

+static void rt_init_metrics(struct rtable *rt, struct fib_info *fi)
+{
+ if (fi->fib_metrics != (u32 *) dst_default_metrics) {
+ fib_info_hold(fi);
+ rt->fi = fi;
+ }
+
+ dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+}
+
static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
const struct fib_result *res,
struct fib_nh_exception *fnhe,
@@ -1442,7 +1457,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
rt->rt_gateway = nh->nh_gw;
rt->rt_uses_gateway = 1;
}
- dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+ rt_init_metrics(rt, fi);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
@@ -1494,6 +1509,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
rt->rt_gateway = 0;
rt->rt_uses_gateway = 0;
rt->rt_table_id = 0;
+ rt->fi = NULL;
INIT_LIST_HEAD(&rt->rt_uncached);

rt->dst.output = ip_output;