[PATCH v2] Wireguard: Fix data-race in rx/tx counter
From: Rafael Passos
Date: Mon Jun 29 2026 - 09:03:36 EST
Fix data-race in {rx/tx}_bytes counter for wireguard connection.
These values were incremented inside a read_lock_bh block, without
exclusive write protection.
Using per-cpu counters guarantees consistency, and move overhead only to
the read part: the least frequent operation.
Signed-off-by: Rafael Passos <rafael@xxxxxxxxxxx>
---
As we discussed in the thread, keeping this counters accurate might not
be worth this extra memory cost of per-cpu counter.
It was great reading and learning about it. I think this would have been
a good patch :)
When looking around drivers/net, I found a few u64_stats uses without
per-cpu. I will do some digging to understand the reasoning behind it,
compared to the original "bare u64 counters" in wireguard.
Thanks,
Rafael Passos
drivers/net/wireguard/netlink.c | 22 ++++++++++++++++++++--
drivers/net/wireguard/peer.c | 7 ++++++-
drivers/net/wireguard/peer.h | 7 ++++++-
drivers/net/wireguard/receive.c | 9 ++++++++-
drivers/net/wireguard/socket.c | 9 +++++++--
5 files changed, 47 insertions(+), 7 deletions(-)
diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
index 1da7e98d0d50..f5978a8c2ca3 100644
--- a/drivers/net/wireguard/netlink.c
+++ b/drivers/net/wireguard/netlink.c
@@ -105,13 +105,31 @@ get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx)
if (fail)
goto err;
+ // read per-cpu counters for peer rx/tx_bytes
+ u64 total_rx_bytes = 0, total_tx_bytes = 0;
+ u64 rx_bytes = 0, tx_bytes = 0;
+ struct wg_peer_stats *pcpu_ptr;
+ unsigned int cpu, start;
+
+ for_each_possible_cpu(cpu) {
+ pcpu_ptr = per_cpu_ptr(peer->pcpu_stats, cpu);
+ do {
+ start = u64_stats_fetch_begin(&pcpu_ptr->syncp);
+ rx_bytes = u64_stats_read(&pcpu_ptr->rx_bytes);
+ tx_bytes = u64_stats_read(&pcpu_ptr->tx_bytes);
+ } while (u64_stats_fetch_retry(&pcpu_ptr->syncp, start));
+
+ total_rx_bytes += rx_bytes;
+ total_tx_bytes += tx_bytes;
+ }
+
if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME,
sizeof(last_handshake), &last_handshake) ||
nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
peer->persistent_keepalive_interval) ||
- nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes,
+ nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, total_tx_bytes,
WGPEER_A_UNSPEC) ||
- nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes,
+ nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, total_rx_bytes,
WGPEER_A_UNSPEC) ||
nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1))
goto err;
diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c
index 1cb502a932e0..a37aa31f132a 100644
--- a/drivers/net/wireguard/peer.c
+++ b/drivers/net/wireguard/peer.c
@@ -36,6 +36,10 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
if (unlikely(dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)))
goto err;
+ peer->pcpu_stats = alloc_percpu(struct wg_peer_stats);
+ if (unlikely(!peer->pcpu_stats))
+ goto err;
+
peer->device = wg;
wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
public_key, preshared_key, peer);
@@ -189,7 +193,8 @@ static void rcu_release(struct rcu_head *rcu)
dst_cache_destroy(&peer->endpoint_cache);
WARN_ON(wg_prev_queue_peek(&peer->tx_queue) || wg_prev_queue_peek(&peer->rx_queue));
-
+
+ free_percpu(peer->pcpu_stats);
/* The final zeroing takes care of clearing any remaining handshake key
* material and other potentially sensitive information.
*/
diff --git a/drivers/net/wireguard/peer.h b/drivers/net/wireguard/peer.h
index 718fb42bdac7..e01781724aa1 100644
--- a/drivers/net/wireguard/peer.h
+++ b/drivers/net/wireguard/peer.h
@@ -34,6 +34,11 @@ struct endpoint {
};
};
+struct wg_peer_stats {
+ u64_stats_t rx_bytes, tx_bytes;
+ struct u64_stats_sync syncp;
+};
+
struct wg_peer {
struct wg_device *device;
struct prev_queue tx_queue, rx_queue;
@@ -49,7 +54,7 @@ struct wg_peer {
struct work_struct transmit_handshake_work, clear_peer_work, transmit_packet_work;
struct cookie latest_cookie;
struct hlist_node pubkey_hash;
- u64 rx_bytes, tx_bytes;
+ struct wg_peer_stats __percpu *pcpu_stats;
struct timer_list timer_retransmit_handshake, timer_send_keepalive;
struct timer_list timer_new_handshake, timer_zero_key_material;
struct timer_list timer_persistent_keepalive;
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index eb8851113654..d799370122b5 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -19,8 +19,15 @@
/* Must be called with bh disabled. */
static void update_rx_stats(struct wg_peer *peer, size_t len)
{
+ struct wg_peer_stats *pcpu_ptr;
+
dev_sw_netstats_rx_add(peer->device->dev, len);
- peer->rx_bytes += len;
+
+ pcpu_ptr = this_cpu_ptr(peer->pcpu_stats);
+
+ u64_stats_update_begin(&pcpu_ptr->syncp);
+ u64_stats_add(&pcpu_ptr->rx_bytes, len);
+ u64_stats_update_end(&pcpu_ptr->syncp);
}
#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type)
diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
index 0028ef17dc71..685ae6a0fb2c 100644
--- a/drivers/net/wireguard/socket.c
+++ b/drivers/net/wireguard/socket.c
@@ -166,6 +166,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb,
int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds)
{
+ struct wg_peer_stats *pcpu_ptr;
size_t skb_len = skb->len;
int ret = -EAFNOSUPPORT;
@@ -178,8 +179,12 @@ int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds)
&peer->endpoint_cache);
else
dev_kfree_skb(skb);
- if (likely(!ret))
- peer->tx_bytes += skb_len;
+ if (likely(!ret)) {
+ pcpu_ptr = this_cpu_ptr(peer->pcpu_stats);
+ u64_stats_update_begin(&pcpu_ptr->syncp);
+ u64_stats_add(&pcpu_ptr->tx_bytes, skb_len);
+ u64_stats_update_end(&pcpu_ptr->syncp);
+ }
read_unlock_bh(&peer->endpoint_lock);
return ret;
--
2.53.0