[RFC PATCH 1/2] aoe: add reference count in aoeif for tracking the using of net_device

From: Chun-Yi Lee
Date: Wed Oct 02 2024 - 00:06:53 EST


This is a patch for debugging. For tracking the reference count of using
net_device in aoeif, this patch adds a nd_pcpu_refcnt field in aoeif
structure. Two wrappers, nd_dev_hold() and nd_dev_put() are used to
call dev_hold(nd)/dev_put(nd) and maintain ifp->nd_pcpu_refcnt at the
same time.

Defined DEBUG to the top of the aoe.h can enable the tracking function.
The nd_pcpu_refcnt will be printed to debugfs:

rttavg: 249029 rttdev: 1781043
nskbpool: 0
kicked: 0
maxbcnt: 1024
ref: 0
falloc: 36
ffree: 0000000013c0033f
52540054c48e:0:16:16
ssthresh:8
taint:0
r:1270
w:8
enp1s0:1 <-- the aoeif->nd_pcpu_refcnt is behind nd->name

The value of aoeif->nd_pcpu_refcnt will also be printed when 'rmmod aoe':

[23412.255237][ T2857] aoe: enp1s0->refcnt: 32, aoeif->nd_refcnt: 0

Using kernel dynamic debug can print more detail log but it causes extra
overhead:

echo -n 'file drivers/block/aoe/* +p' > /sys/kernel/debug/dynamic_debug/control

[ 6961.938642] aoe: tx dev_put enp1s0->refcnt: 31, aoeif->nd_refcnt: 1
[ 7023.368814] aoe: aoecmd_cfg_pkts dev_hold lo->refcnt: 30
[ 7023.370530] aoe: aoecmd_cfg_pkts dev_hold enp1s0->refcnt: 32, aoeif->nd_refcnt: 2
[ 7023.372977] aoe: tx dev_put lo->refcnt: 29
[ 7023.375147] aoe: tx dev_put enp1s0->refcnt: 31, aoeif->nd_refcnt: 1

Normally, after one operation of aoe, the aoeif->nd_refcnt should be
shown as '1' which means that calls of dev_hold(nd)/dev_put(nd) are
balanced. The final '1' reference of net_device will be removed when
rmmod aoe.

Signed-off-by: Chun-Yi Lee <jlee@xxxxxxxx>
---
drivers/block/aoe/aoe.h | 84 ++++++++++++++++++++++++++++++++++++++
drivers/block/aoe/aoeblk.c | 5 +++
drivers/block/aoe/aoedev.c | 20 +++++++++
3 files changed, 109 insertions(+)

diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 749ae1246f4c..a6d954562794 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -1,5 +1,6 @@
/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
#include <linux/blk-mq.h>
+#include <linux/netdevice.h>

#define VERSION "85"
#define AOE_MAJOR 152
@@ -133,6 +134,9 @@ struct aoeif {
struct net_device *nd;
ulong lost;
int bcnt;
+#ifdef DEBUG
+ int __percpu *nd_pcpu_refcnt;
+#endif
};

struct aoetgt {
@@ -238,6 +242,7 @@ void aoedev_downdev(struct aoedev *d);
int aoedev_flush(const char __user *str, size_t size);
void aoe_failbuf(struct aoedev *, struct buf *);
void aoedev_put(struct aoedev *);
+struct aoeif *get_aoeif(struct net_device *nd);

int aoenet_init(void);
void aoenet_exit(void);
@@ -246,3 +251,82 @@ int is_aoe_netif(struct net_device *ifp);
int set_aoe_iflist(const char __user *str, size_t size);

extern struct workqueue_struct *aoe_wq;
+
+#ifdef DEBUG
+static inline int aoeif_nd_refcnt_read(const struct aoeif *ifp)
+{
+ int i, refcnt = 0;
+
+ for_each_possible_cpu(i)
+ refcnt += *per_cpu_ptr(ifp->nd_pcpu_refcnt, i);
+ return refcnt;
+}
+
+static inline void aoeif_nd_refcnt_free(struct aoeif *ifp)
+{
+ int i;
+
+ if(!ifp)
+ return;
+ if (ifp->nd)
+ pr_info("aoe: %s->refcnt: %d, aoeif->nd_refcnt: %d\n",
+ ifp->nd->name, netdev_refcnt_read(ifp->nd),
+ aoeif_nd_refcnt_read(ifp));
+ else
+ pr_info("aoe: aoeif->nd_refcnt: %d\n", aoeif_nd_refcnt_read(ifp));
+
+ for_each_possible_cpu(i)
+ *per_cpu_ptr(ifp->nd_pcpu_refcnt, i) = 0;
+ free_percpu(ifp->nd_pcpu_refcnt);
+ ifp->nd_pcpu_refcnt = NULL;
+}
+
+/* ifi aoeif input, nb be set to aoeif or in the future will be set */
+static inline void __nd_dev_hold(const char *str, struct net_device *nd, struct aoeif *ifi)
+{
+ struct aoeif *ifp;
+
+ if (!nd)
+ return;
+ dev_hold(nd);
+ ifp = ifi? ifi:get_aoeif(nd);
+ if (ifp) {
+ this_cpu_inc(*ifp->nd_pcpu_refcnt);
+ pr_debug("aoe: %s dev_hold %s->refcnt: %d, aoeif->nd_refcnt: %d\n",
+ str, nd->name, netdev_refcnt_read(nd),
+ aoeif_nd_refcnt_read(ifp));
+ } else
+ pr_debug("aoe: %s dev_hold %s->refcnt: %d\n",
+ str, nd->name, netdev_refcnt_read(nd));
+}
+#define nd_dev_hold(msg, ifi) __nd_dev_hold(__FUNCTION__, (msg), (ifi))
+
+static inline void __nd_dev_put(const char *str, struct net_device *nd, struct aoeif *ifi)
+{
+ struct aoeif *ifp;
+
+ if (!nd)
+ return;
+ dev_put(nd);
+ ifp = ifi? ifi:get_aoeif(nd);
+ if (ifp) {
+ this_cpu_dec(*ifp->nd_pcpu_refcnt);
+ pr_debug("aoe: %s dev_put %s->refcnt: %d, aoeif->nd_refcnt: %d\n",
+ str, nd->name, netdev_refcnt_read(nd),
+ aoeif_nd_refcnt_read(ifp));
+ } else
+ pr_debug("aoe: %s dev_put %s->refcnt: %d\n",
+ str, nd->name, netdev_refcnt_read(nd));
+}
+#define nd_dev_put(msg, ifi) __nd_dev_put(__FUNCTION__, (msg), (ifi))
+#else
+static inline void nd_dev_put(struct net_device *nd, struct aoeif *ifi)
+{
+ dev_hold(nd);
+}
+static inline void nd_dev_hold(struct net_device *nd, struct aoeif *ifi)
+{
+ dev_put(nd);
+}
+static inline void aoeif_nd_refcnt_free(const struct aoeif *ifp) {}
+#endif // DEBUG
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 2028795ec61c..19d62ccca1e9 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -142,7 +142,12 @@ static int aoe_debugfs_show(struct seq_file *s, void *ignored)
ifp = (*t)->ifs;
ife = ifp + ARRAY_SIZE((*t)->ifs);
for (; ifp->nd && ifp < ife; ifp++) {
+#ifdef DEBUG
+ seq_printf(s, "%c%s:%d", c, ifp->nd->name,
+ aoeif_nd_refcnt_read(ifp));
+#else
seq_printf(s, "%c%s", c, ifp->nd->name);
+#endif
c = ',';
}
seq_puts(s, "\n");
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 3523dd82d7a0..9781488b286b 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -529,3 +529,23 @@ aoedev_init(void)
{
return 0;
}
+
+struct aoeif *
+get_aoeif(struct net_device *nd)
+{
+ struct aoedev *d;
+ struct aoetgt *t, **tt, **te;
+ struct aoeif *ifp;
+
+ for (d=devlist; d; d=d->next) {
+ tt = d->targets;
+ te = tt + d->ntargets;
+ for (; tt < te && (t = *tt); tt++) {
+ for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
+ if (ifp->nd && (ifp->nd == nd))
+ return ifp;
+ }
+ }
+ }
+ return NULL;
+}
--
2.35.3