Re: [BUG] KFENCE: use-after-free read in udp_tunnel_nic_device_sync_work

From: Sam Sun

Date: Wed Jun 24 2026 - 12:37:05 EST


On Wed, Jun 24, 2026 at 11:01 PM Eric Dumazet <edumazet@xxxxxxxxxx> wrote:
>
> On Wed, Jun 24, 2026 at 7:46 AM Sam Sun <samsun1006219@xxxxxxxxx> wrote:
> >
>
> > So we are still freeing struct udp_tunnel_nic while its embedded work_struct
> > is active. debugobjects catches this at kfree() before the active work gets a
> > chance to run later and dereference the freed utn.
> >
> > My read is that the conversion from bitfields to atomic bitops removes the
> > plain bitfield data race, but UDP_TUNNEL_NIC_WORK_PENDING is still only one
> > boolean state. It can represent "some work is pending", but it cannot
> > distinguish between:
> > idle
> > queued
> > running
> > running and queued again
> >
> > In particular, the workqueue core clears WORK_STRUCT_PENDING before invoking
> > the worker. At that point the same work item can be queued again by
> > udp_tunnel_nic_device_sync(). If an already running instance later executes:
> >
> > clear_bit(UDP_TUNNEL_NIC_WORK_PENDING, &utn->flags);
> >
> > it can still clear the bit that was set for the requeued instance. Then
> > udp_tunnel_nic_unregister() may observe UDP_TUNNEL_NIC_WORK_PENDING clear and
> > free utn, even though debugobjects still sees utn->work as active.
> >
>
> -ETOOMANYBUGS
>
> Ok, we could try to convert pending bit to a refcount.
>
> diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
> index 9944ed923ddfd10f9adf6ad788c0740daeaf2adb..2e14686f35896cb0caba3f8f587ef8b369090fbf
> 100644
> --- a/net/ipv4/udp_tunnel_nic.c
> +++ b/net/ipv4/udp_tunnel_nic.c
> @@ -3,6 +3,7 @@
>
> #include <linux/ethtool_netlink.h>
> #include <linux/netdevice.h>
> +#include <linux/refcount.h>
> #include <linux/slab.h>
> #include <linux/types.h>
> #include <linux/workqueue.h>
> @@ -30,9 +31,8 @@ struct udp_tunnel_nic_table_entry {
> * @work: async work for talking to hardware from process context
> * @dev: netdev pointer
> * @lock: protects all fields
> - * @need_sync: at least one port start changed
> - * @need_replay: space was freed, we need a replay of all ports
> - * @work_pending: @work is currently scheduled
> + * @flags: sync, replay flags
> + * @refcnt: reference count
> * @n_tables: number of tables under @entries
> * @missed: bitmap of tables which overflown
> * @entries: table of tables of ports currently offloaded
> @@ -44,9 +44,11 @@ struct udp_tunnel_nic {
>
> struct mutex lock;
>
> - u8 need_sync:1;
> - u8 need_replay:1;
> - u8 work_pending:1;
> + unsigned long flags;
> +#define UDP_TUNNEL_NIC_NEED_SYNC 0
> +#define UDP_TUNNEL_NIC_NEED_REPLAY 1
> +
> + refcount_t refcnt;
>
> unsigned int n_tables;
> unsigned long missed;
> @@ -116,7 +118,7 @@ udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn,
> unsigned int flag)
> {
> entry->flags |= flag;
> - utn->need_sync = 1;
> + set_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags);
> }
>
> static void
> @@ -283,7 +285,7 @@ udp_tunnel_nic_device_sync_by_table(struct net_device *dev,
> static void
> __udp_tunnel_nic_device_sync(struct net_device *dev, struct
> udp_tunnel_nic *utn)
> {
> - if (!utn->need_sync)
> + if (!test_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags))
> return;
>
> if (dev->udp_tunnel_nic_info->sync_table)
> @@ -291,21 +293,24 @@ __udp_tunnel_nic_device_sync(struct net_device
> *dev, struct udp_tunnel_nic *utn)
> else
> udp_tunnel_nic_device_sync_by_port(dev, utn);
>
> - utn->need_sync = 0;
> + clear_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags);
> /* Can't replay directly here, in case we come from the tunnel driver's
> * notification - trying to replay may deadlock inside tunnel driver.
> */
> - utn->need_replay = udp_tunnel_nic_should_replay(dev, utn);
> + if (udp_tunnel_nic_should_replay(dev, utn))
> + set_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
> + else
> + clear_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
> }
>
> static void
> udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
> {
> - if (!utn->need_sync)
> + if (!test_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags))
> return;
>
> - queue_work(udp_tunnel_nic_workqueue, &utn->work);
> - utn->work_pending = 1;
> + if (queue_work(udp_tunnel_nic_workqueue, &utn->work))
> + refcount_inc(&utn->refcnt);
> }
>
> static bool
> @@ -348,7 +353,7 @@ udp_tunnel_nic_has_collision(struct net_device
> *dev, struct udp_tunnel_nic *utn,
> if (!udp_tunnel_nic_entry_is_free(entry) &&
> entry->port == ti->port &&
> entry->type != ti->type) {
> - __set_bit(i, &utn->missed);
> + set_bit(i, &utn->missed);
> return true;
> }
> }
> @@ -483,7 +488,7 @@ udp_tunnel_nic_add_new(struct net_device *dev,
> struct udp_tunnel_nic *utn,
> * are no devices currently which have multiple tables accepting
> * the same tunnel type, and false positives are okay.
> */
> - __set_bit(i, &utn->missed);
> + set_bit(i, &utn->missed);
> }
>
> return false;
> @@ -552,7 +557,7 @@ static void __udp_tunnel_nic_reset_ntf(struct
> net_device *dev)
>
> mutex_lock(&utn->lock);
>
> - utn->need_sync = false;
> + clear_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags);
> for (i = 0; i < utn->n_tables; i++)
> for (j = 0; j < info->tables[i].n_entries; j++) {
> struct udp_tunnel_nic_table_entry *entry;
> @@ -696,8 +701,8 @@ udp_tunnel_nic_flush(struct net_device *dev,
> struct udp_tunnel_nic *utn)
> for (i = 0; i < utn->n_tables; i++)
> memset(utn->entries[i], 0, array_size(info->tables[i].n_entries,
> sizeof(**utn->entries)));
> - WARN_ON(utn->need_sync);
> - utn->need_replay = 0;
> + WARN_ON(test_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags));
> + clear_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
> }
>
> static void
> @@ -713,8 +718,8 @@ udp_tunnel_nic_replay(struct net_device *dev,
> struct udp_tunnel_nic *utn)
> for (i = 0; i < utn->n_tables; i++)
> for (j = 0; j < info->tables[i].n_entries; j++)
> udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]);
> - utn->missed = 0;
> - utn->need_replay = 0;
> + bitmap_zero(&utn->missed, UDP_TUNNEL_NIC_MAX_TABLES);
> + clear_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
>
> if (!info->shared) {
> udp_tunnel_get_rx_info(dev);
> @@ -728,6 +733,25 @@ udp_tunnel_nic_replay(struct net_device *dev,
> struct udp_tunnel_nic *utn)
> udp_tunnel_nic_entry_unfreeze(&utn->entries[i][j]);
> }
>
> +static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn)
> +{
> + unsigned int i;
> +
> + for (i = 0; i < utn->n_tables; i++)
> + kfree(utn->entries[i]);
> +
> + if (utn->dev)
> + dev_put(utn->dev);
> +
> + kfree(utn);
> +}
> +
> +static void udp_tunnel_nic_put(struct udp_tunnel_nic *utn)
> +{
> + if (refcount_dec_and_test(&utn->refcnt))
> + udp_tunnel_nic_free(utn);
> +}
> +
> static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
> {
> struct udp_tunnel_nic *utn =
> @@ -736,14 +760,15 @@ static void
> udp_tunnel_nic_device_sync_work(struct work_struct *work)
> rtnl_lock();
> mutex_lock(&utn->lock);
>
> - utn->work_pending = 0;
> __udp_tunnel_nic_device_sync(utn->dev, utn);
>
> - if (utn->need_replay)
> + if (test_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags))
> udp_tunnel_nic_replay(utn->dev, utn);
>
> mutex_unlock(&utn->lock);
> rtnl_unlock();
> +
> + udp_tunnel_nic_put(utn);
> }
>
> static struct udp_tunnel_nic *
> @@ -759,6 +784,7 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
> utn->n_tables = n_tables;
> INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work);
> mutex_init(&utn->lock);
> + refcount_set(&utn->refcnt, 1);
>
> for (i = 0; i < n_tables; i++) {
> utn->entries[i] = kzalloc_objs(*utn->entries[i],
> @@ -776,15 +802,6 @@ udp_tunnel_nic_alloc(const struct
> udp_tunnel_nic_info *info,
> return NULL;
> }
>
> -static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn)
> -{
> - unsigned int i;
> -
> - for (i = 0; i < utn->n_tables; i++)
> - kfree(utn->entries[i]);
> - kfree(utn);
> -}
> -
> static int udp_tunnel_nic_register(struct net_device *dev)
> {
> const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
> @@ -863,6 +880,7 @@ static void
> udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
> {
> const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
> + bool last = true;
>
> udp_tunnel_nic_lock(dev);
>
> @@ -889,6 +907,7 @@ udp_tunnel_nic_unregister(struct net_device *dev,
> struct udp_tunnel_nic *utn)
> udp_tunnel_drop_rx_info(dev);
> utn->dev = first->dev;
> udp_tunnel_nic_unlock(dev);
> + last = false;
> goto release_dev;
> }
>
> @@ -901,16 +920,11 @@ udp_tunnel_nic_unregister(struct net_device
> *dev, struct udp_tunnel_nic *utn)
> udp_tunnel_nic_flush(dev, utn);
> udp_tunnel_nic_unlock(dev);
>
> - /* Wait for the work to be done using the state, netdev core will
> - * retry unregister until we give up our reference on this device.
> - */
> - if (utn->work_pending)
> - return;
> -
> - udp_tunnel_nic_free(utn);
> + udp_tunnel_nic_put(utn);
> release_dev:
> dev->udp_tunnel_nic = NULL;
> - dev_put(dev);
> + if (!last)
> + dev_put(dev);
> }
>
> static int

I tested the refcount version, and I could no longer reproduce the bug with
the C reproducer. I think this patch fixes the specific lifetime problem
exposed by the reproducer.

Thanks,
Yue