Re: unregister_netdevice: waiting for eth0 to become free. Usage count = 1
From: Wei Wang
Date: Fri Aug 11 2017 - 13:26:02 EST
On Fri, Aug 11, 2017 at 9:48 AM, Cong Wang <xiyou.wangcong@xxxxxxxxx> wrote:
> Hi,
>
> On Thu, Aug 10, 2017 at 11:12 AM, John Stultz <john.stultz@xxxxxxxxxx> wrote:
>> On Wed, Aug 9, 2017 at 10:41 PM, Wei Wang <weiwan@xxxxxxxxxx> wrote:
>>> Hi John,
>>>
>>> Is it possible to try the attached patch?
>>
>> Thanks so much for the quick turn around!
>>
>> So I dropped all the reverts you suggested, and applied this one
>> against 4.13-rc4, but I'm still seeing the problematic behavior.
>
> Does the following one-line fix make a difference?
>
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index a640fbcba15d..c145a35763a0 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -141,7 +141,7 @@ static void rt6_uncached_list_del(struct rt6_info *rt)
> struct uncached_list *ul = rt->rt6i_uncached_list;
>
> spin_lock_bh(&ul->lock);
> - list_del(&rt->rt6i_uncached);
> + list_del_init(&rt->rt6i_uncached);
> spin_unlock_bh(&ul->lock);
> }
> }
Thanks a lot Cong for proposing this fix.
For the last few days, John has been helping me running debug image
and we found out that the leaked dst is probably in addrconf.c.
Martin and I are looking through the code and trying to put more debugs.
John,
If after Cong's fix, the issue still happens, could you help try the
patch attached and collect all logs when you try the reproduce the
issue? It would be great to have logs for both success case and the
failure case.
Thanks so much for your help.
Wei
From 0ff591e00eac13888c5ee9c84ee51b286b27389d Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@xxxxxxxxxx>
Date: Thu, 10 Aug 2017 21:12:32 -0700
Subject: [PATCH] ipv6: unregister_netdevice debug
Change-Id: I97b044b957a146de480e5427212c1ce80bc3dd3c
---
include/net/dst.h | 7 +++++++
include/net/dst_ops.h | 1 +
include/net/ip6_fib.h | 10 ++++++++++
net/core/dev.c | 16 +++++++++++++++
net/core/dst.c | 10 ++++++++--
net/ipv6/addrconf.c | 24 +++++++++++++++++++---
net/ipv6/route.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++---
7 files changed, 115 insertions(+), 8 deletions(-)
diff --git a/include/net/dst.h b/include/net/dst.h
index f73611ec4017..48d9f0322492 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -56,6 +56,13 @@ struct dst_entry {
#define DST_XFRM_TUNNEL 0x0020
#define DST_XFRM_QUEUE 0x0040
#define DST_METADATA 0x0080
+#define RTF_ICMPV6_DST 0x0100
+#define RTF_ADDRCONF_DST 0x0200
+#define RTF_UNCACHED_DST 0x0400
+#define RTF_CACHE_DST 0x0800
+#define RTF_FIB6_DST 0x1000
+#define RTF_PCPU_DST 0x2000
+
short error;
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index c84b3287e38b..eef7d6b6f3cd 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -39,6 +39,7 @@ struct dst_ops {
struct kmem_cache *kmem_cachep;
struct percpu_counter pcpuc_entries ____cacheline_aligned_in_smp;
+ void (*do_account)(struct dst_entry *dst);
};
static inline int dst_entries_get_fast(struct dst_ops *dst)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1a88008cc6f5..af382d123f63 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -214,6 +214,16 @@ struct rt6_statistics {
__u32 fib_rt_entries; /* rt entries in table */
__u32 fib_rt_cache; /* cache routes */
__u32 fib_discarded_routes;
+ __u32 icmpv6_dst;
+ __u32 addrconf_dst;
+ __u32 fib6_dst;
+ __u32 cache_dst;
+ __u32 uncached_dst;
+ __u32 pcpu_dst;
+ __u32 dev_free_fib6;
+ __u32 dev_free_uncached;
+ __u32 dev_free_icmpv6;
+ __u32 dev_free_addrconf;
};
#define RTN_TL_ROOT 0x0001
diff --git a/net/core/dev.c b/net/core/dev.c
index 8ea6b4b42611..5c6fea78003f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1,3 +1,4 @@
+
/*
* NET3 Protocol independent device support routines.
*
@@ -7738,8 +7739,23 @@ static void netdev_wait_allrefs(struct net_device *dev)
{
unsigned long rebroadcast_time, warning_time;
int refcnt;
+ struct net *net = dev_net(dev);
linkwatch_forget_dev(dev);
+ printk("%s: dev_name %s, icmpv6_dst %u, addrconf_dst %u, fib6_dst %u, pcpu_dst %u, cache_dst %u, uncached_list %u, dev_free_fib6 %u, dev_free_uncached %u, dev_free_icmpv6 %u, dev_free_addrconf %u, fib_rt_entries %u\n", __func__,
+ dev->name,
+ net->ipv6.rt6_stats->icmpv6_dst,
+ net->ipv6.rt6_stats->addrconf_dst,
+ net->ipv6.rt6_stats->fib6_dst,
+ net->ipv6.rt6_stats->pcpu_dst,
+ net->ipv6.rt6_stats->cache_dst,
+ net->ipv6.rt6_stats->uncached_dst,
+ net->ipv6.rt6_stats->dev_free_fib6,
+ net->ipv6.rt6_stats->dev_free_uncached,
+ net->ipv6.rt6_stats->dev_free_icmpv6,
+ net->ipv6.rt6_stats->dev_free_addrconf,
+ net->ipv6.rt6_stats->fib_rt_entries);
+
rebroadcast_time = warning_time = jiffies;
refcnt = netdev_refcnt_read(dev);
diff --git a/net/core/dst.c b/net/core/dst.c
index 00aa972ad1a1..8c20deec398b 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -184,8 +184,11 @@ void dst_release(struct dst_entry *dst)
if (unlikely(newrefcnt < 0))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
- if (!newrefcnt)
+ if (!newrefcnt) {
call_rcu(&dst->rcu_head, dst_destroy_rcu);
+ if(dst->ops->do_account)
+ dst->ops->do_account(dst);
+ }
}
}
EXPORT_SYMBOL(dst_release);
@@ -199,8 +202,11 @@ void dst_release_immediate(struct dst_entry *dst)
if (unlikely(newrefcnt < 0))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
- if (!newrefcnt)
+ if (!newrefcnt) {
dst_destroy(dst);
+ if(dst->ops->do_account)
+ dst->ops->do_account(dst);
+ }
}
}
EXPORT_SYMBOL(dst_release_immediate);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3c46e9513a31..4625425378ae 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -95,7 +95,7 @@
#include <linux/export.h>
/* Set to 3 to get tracing... */
-#define ACONF_DEBUG 2
+#define ACONF_DEBUG 3
#if ACONF_DEBUG >= 3
#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
@@ -108,6 +108,8 @@
#define IPV6_MAX_STRLEN \
sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255")
+static int num_ifa;
+
static inline u32 cstamp_delta(unsigned long cstamp)
{
return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
@@ -923,9 +925,11 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
pr_warn("Freeing alive inet6 address %p\n", ifp);
return;
}
+ printk("%s: dst refcnt %d", __func__, atomic_read(&(ifp->rt->dst.__refcnt)));
ip6_rt_put(ifp->rt);
kfree_rcu(ifp, rcu);
+ num_ifa--;
}
static void
@@ -1016,6 +1020,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
goto out;
}
+ num_ifa++;
+
rt = addrconf_dst_alloc(idev, addr, false);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
@@ -1076,6 +1082,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
inet6addr_notifier_call_chain(NETDEV_UP, ifa);
else {
kfree(ifa);
+ num_ifa--;
in6_dev_put(idev);
ifa = ERR_PTR(err);
}
@@ -3598,6 +3605,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
int _keep_addr;
bool keep_addr;
int state, i;
+ int err;
ASSERT_RTNL();
@@ -3679,12 +3687,14 @@ static int addrconf_ifdown(struct net_device *dev, int how)
spin_unlock_bh(&ifa->lock);
in6_ifa_put(ifa);
write_lock_bh(&idev->lock);
+ printk("%s: release idev->tempaddr_list entries\n", __func__);
}
/* re-combine the user config with event to determine if permanent
* addresses are to be removed from the interface list
*/
keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
+ printk("%s: num_ifa %d\n", __func__, num_ifa);
INIT_LIST_HEAD(&del_list);
list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
@@ -3710,15 +3720,20 @@ static int addrconf_ifdown(struct net_device *dev, int how)
rt = ifa->rt;
ifa->rt = NULL;
+ printk("%s: keep ifa\n", __func__);
} else {
state = ifa->state;
ifa->state = INET6_IFADDR_STATE_DEAD;
+ printk("%s: not keep ifa\n", __func__);
}
spin_unlock_bh(&ifa->lock);
- if (rt)
- ip6_del_rt(rt);
+ if (rt) {
+ err = ip6_del_rt(rt);
+ if (err)
+ printk("%s: ip6_del_rt() failed\n", __func__);
+ }
if (state != INET6_IFADDR_STATE_DEAD) {
__ipv6_ifa_notify(RTM_DELADDR, ifa);
@@ -3741,7 +3756,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
list_del(&ifa->if_list);
in6_ifa_put(ifa);
+ printk("%s: delete ifa from del_list\n", __func__);
}
+ printk("%s: num_ifa %d\n", __func__, num_ifa);
/* Step 5: Discard anycast and multicast list */
if (how) {
@@ -6566,6 +6583,7 @@ int __init addrconf_init(void)
err = -ENOMEM;
goto out_nowq;
}
+ num_ifa = 0;
/* The addrconf netdev notifier requires that loopback_dev
* has it's ipv6 private information allocated and setup
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4d30c96a819d..422c39d8cc44 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -172,6 +172,10 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
rt->dst.dev = loopback_dev;
dev_hold(rt->dst.dev);
dev_put(rt_dev);
+ if (rt->dst.flags | RTF_ICMPV6_DST)
+ net->ipv6.rt6_stats->dev_free_icmpv6++;
+ else
+ net->ipv6.rt6_stats->dev_free_uncached++;
}
}
spin_unlock_bh(&ul->lock);
@@ -237,6 +241,25 @@ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
__ipv6_confirm_neigh(dev, daddr);
}
+static void ip6_do_account(struct dst_entry *dst)
+{
+ struct net_device *dev = dst->dev;
+ struct net *net = dev_net(dev);
+
+ if (dst->flags & RTF_ICMPV6_DST)
+ net->ipv6.rt6_stats->icmpv6_dst--;
+ if (dst->flags & RTF_ADDRCONF_DST)
+ net->ipv6.rt6_stats->addrconf_dst--;
+ if (dst->flags & RTF_FIB6_DST)
+ net->ipv6.rt6_stats->fib6_dst--;
+ if (dst->flags & RTF_UNCACHED_DST)
+ net->ipv6.rt6_stats->uncached_dst--;
+ if (dst->flags & RTF_CACHE_DST)
+ net->ipv6.rt6_stats->cache_dst--;
+ if (dst->flags & RTF_PCPU_DST)
+ net->ipv6.rt6_stats->pcpu_dst--;
+}
+
static struct dst_ops ip6_dst_ops_template = {
.family = AF_INET6,
.gc = ip6_dst_gc,
@@ -254,6 +277,7 @@ static struct dst_ops ip6_dst_ops_template = {
.local_out = __ip6_local_out,
.neigh_lookup = ip6_neigh_lookup,
.confirm_neigh = ip6_confirm_neigh,
+ .do_account = ip6_do_account,
};
static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
@@ -1004,6 +1028,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
{
struct rt6_info *pcpu_rt;
+ struct net *net = dev_net(rt->dst.dev);
pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
rt->dst.dev, rt->dst.flags);
@@ -1013,6 +1038,9 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
ip6_rt_copy_init(pcpu_rt, rt);
pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
pcpu_rt->rt6i_flags |= RTF_PCPU;
+ pcpu_rt->dst.flags &= ~(RTF_CACHE_DST|RTF_ADDRCONF_DST|RTF_FIB6_DST);
+ pcpu_rt->dst.flags |= RTF_PCPU_DST;
+ net->ipv6.rt6_stats->pcpu_dst++;
return pcpu_rt;
}
@@ -1135,6 +1163,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
* No need for another dst_hold()
*/
rt6_uncached_list_add(uncached_rt);
+ net->ipv6.rt6_stats->uncached_dst++;
+ uncached_rt->dst.flags |= RTF_UNCACHED_DST;
} else {
uncached_rt = net->ipv6.ip6_null_entry;
dst_hold(&uncached_rt->dst);
@@ -1415,6 +1445,9 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
if (nrt6) {
+ struct net *net = dev_net(nrt6->dst.dev);
+ net->ipv6.rt6_stats->cache_dst++;
+ nrt6->dst.flags |= RTF_CACHE_DST;
rt6_do_update_pmtu(nrt6, mtu);
/* ip6_ins_rt(nrt6) will bump the
@@ -1673,7 +1706,8 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
goto out;
}
- rt->dst.flags |= DST_HOST;
+ rt->dst.flags |= (DST_HOST|RTF_ICMPV6_DST);
+ net->ipv6.rt6_stats->icmpv6_dst++;
rt->dst.output = ip6_output;
rt->rt6i_gateway = fl6->daddr;
rt->rt6i_dst.addr = fl6->daddr;
@@ -1870,6 +1904,9 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
err = -ENOMEM;
goto out;
}
+ rt->dst.flags |= RTF_FIB6_DST;
+ net->ipv6.rt6_stats->fib6_dst++;
+
if (cfg->fc_flags & RTF_EXPIRES)
rt6_set_expires(rt, jiffies +
@@ -2265,6 +2302,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
struct rd_msg *msg;
int optlen, on_link;
u8 *lladdr;
+ struct net *net;
optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
optlen -= sizeof(*msg);
@@ -2352,6 +2390,9 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
nrt->rt6i_flags &= ~RTF_GATEWAY;
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
+ net = dev_net(nrt->dst.dev);
+ net->ipv6.rt6_stats->cache_dst++;
+ nrt->dst.flags |= RTF_CACHE_DST;
if (ip6_ins_rt(nrt))
goto out_release;
@@ -2703,7 +2744,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
in6_dev_hold(idev);
- rt->dst.flags |= DST_HOST;
+ rt->dst.flags |= (DST_HOST|RTF_ADDRCONF_DST);
rt->dst.input = ip6_input;
rt->dst.output = ip6_output;
rt->rt6i_idev = idev;
@@ -2720,6 +2761,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->rt6i_dst.plen = 128;
tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
rt->rt6i_table = fib6_get_table(net, tb_id);
+ net->ipv6.rt6_stats->addrconf_dst++;
return rt;
}
@@ -2757,6 +2799,7 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
fib6_clean_all(net, fib6_remove_prefsrc, &adni);
}
+
#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
@@ -2788,13 +2831,19 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg)
{
const struct arg_dev_net *adn = arg;
const struct net_device *dev = adn->dev;
+ struct net *net = dev_net(dev);
if ((rt->dst.dev == dev || !dev) &&
rt != adn->net->ipv6.ip6_null_entry &&
(rt->rt6i_nsiblings == 0 ||
(dev && netdev_unregistering(dev)) ||
- !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
+ !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) {
+ if (rt->dst.flags | RTF_ADDRCONF_DST)
+ net->ipv6.rt6_stats->dev_free_addrconf++;
+ else
+ net->ipv6.rt6_stats->dev_free_fib6++;
return -1;
+ }
return 0;
}
--
2.14.0.434.g98096fd7a8-goog