[PATCH 3/5] io_uring/zcrx: add shared-memory notification statistics

From: Clément Léger

Date: Wed Apr 22 2026 - 07:33:38 EST


Add support for an optional stats struct embedded in the refill queue
region, allowing userspace to monitor copy-fallback and no-buffers events
in real-time.

Userspace queries the stats struct size and alignment via
IO_URING_QUERY_ZCRX (notif_stats_size / notif_stats_alignment), then
provides a stats_offset in zcrx_notification_desc pointing to a location
within the refill queue region.

The kernel updates the stats counters in-place using atomic ops on every
copy-fallback and no-buffers event.

Signed-off-by: Clément Léger <cleger@xxxxxxxx>
---
include/uapi/linux/io_uring/query.h | 12 +++++++
include/uapi/linux/io_uring/zcrx.h | 15 +++++++--
io_uring/query.c | 14 ++++++++
io_uring/zcrx.c | 50 +++++++++++++++++++++++++++--
io_uring/zcrx.h | 1 +
5 files changed, 88 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h
index 95500759cc13..738c35c7d05c 100644
--- a/include/uapi/linux/io_uring/query.h
+++ b/include/uapi/linux/io_uring/query.h
@@ -23,6 +23,7 @@ enum {
IO_URING_QUERY_OPCODES = 0,
IO_URING_QUERY_ZCRX = 1,
IO_URING_QUERY_SCQ = 2,
+ IO_URING_QUERY_ZCRX_NOTIF = 3,

__IO_URING_QUERY_MAX,
};
@@ -62,6 +63,17 @@ struct io_uring_query_zcrx {
__u64 __resv2;
};

+struct io_uring_query_zcrx_notif {
+ /* Bitmask of supported ZCRX_NOTIF_* flags*/
+ __u32 notif_flags;
+ /* Size of io_uring_zcrx_notif_stats */
+ __u32 notif_stats_size;
+ /* Required alignment for the stats struct within the region (ie stats_offset) */
+ __u32 notif_stats_off_alignment;
+ __u32 resv1;
+ __u64 __resv2[10];
+};
+
struct io_uring_query_scq {
/* The SQ/CQ rings header size */
__u64 hdr_size;
diff --git a/include/uapi/linux/io_uring/zcrx.h b/include/uapi/linux/io_uring/zcrx.h
index e0c0079626c8..ae9bbca3004c 100644
--- a/include/uapi/linux/io_uring/zcrx.h
+++ b/include/uapi/linux/io_uring/zcrx.h
@@ -73,11 +73,22 @@ enum zcrx_notification_type {
ZCRX_NOTIF_COPY = 1 << 1
};

+enum zcrx_notification_desc_flags {
+ /* If set, stats_offset holds a valid offset to a notif_stats struct */
+ ZCRX_NOTIF_DESC_FLAG_STATS = 1 << 0,
+};
+
+struct io_uring_zcrx_notif_stats {
+ __u64 copy_count; /* cumulative copy-fallback CQEs */
+ __u64 copy_bytes; /* cumulative bytes copied */
+};
+
struct zcrx_notification_desc {
__u64 user_data;
__u32 type_mask;
- __u32 __resv1;
- __u64 __resv2[10];
+ __u32 flags; /* see enum zcrx_notification_desc_flags */
+ __u64 stats_offset; /* offset from the beginning of refill ring region for stats */
+ __u64 __resv2[9];
};

/*
diff --git a/io_uring/query.c b/io_uring/query.c
index c1704d088374..3591106e139d 100644
--- a/io_uring/query.c
+++ b/io_uring/query.c
@@ -9,6 +9,7 @@
union io_query_data {
struct io_uring_query_opcode opcodes;
struct io_uring_query_zcrx zcrx;
+ struct io_uring_query_zcrx_notif zcrx_notif;
struct io_uring_query_scq scq;
};

@@ -44,6 +45,16 @@ static ssize_t io_query_zcrx(union io_query_data *data)
return sizeof(*e);
}

+static ssize_t io_query_zcrx_notif(union io_query_data *data)
+{
+ struct io_uring_query_zcrx_notif *e = &data->zcrx_notif;
+
+ e->notif_flags = ZCRX_NOTIF_TYPE_MASK;
+ e->notif_stats_size = sizeof(struct io_uring_zcrx_notif_stats);
+ e->notif_stats_off_alignment = __alignof__(struct io_uring_zcrx_notif_stats);
+ return sizeof(*e);
+}
+
static ssize_t io_query_scq(union io_query_data *data)
{
struct io_uring_query_scq *e = &data->scq;
@@ -83,6 +94,9 @@ static int io_handle_query_entry(union io_query_data *data, void __user *uhdr,
case IO_URING_QUERY_ZCRX:
ret = io_query_zcrx(data);
break;
+ case IO_URING_QUERY_ZCRX_NOTIF:
+ ret = io_query_zcrx_notif(data);
+ break;
case IO_URING_QUERY_SCQ:
ret = io_query_scq(data);
break;
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 732e585aa13a..c61f94fb14c3 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -414,6 +414,7 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
io_free_region(ifq->user, &ifq->rq_region);
ifq->rq.ring = NULL;
ifq->rq.rqes = NULL;
+ ifq->notif_stats = NULL;
}

static void io_zcrx_free_area(struct io_zcrx_ifq *ifq,
@@ -841,6 +842,33 @@ static int zcrx_register_netdev(struct io_zcrx_ifq *ifq,
return ret;
}

+static int zcrx_validate_notif_stats(struct io_zcrx_ifq *ifq,
+ const struct io_uring_zcrx_ifq_reg *reg,
+ const struct zcrx_notification_desc *notif)
+{
+ size_t stats_off = notif->stats_offset;
+ size_t used, end;
+
+ used = reg->offsets.rqes +
+ sizeof(struct io_uring_zcrx_rqe) * reg->rq_entries;
+
+ if (!IS_ALIGNED(stats_off, __alignof__(struct io_uring_zcrx_notif_stats)))
+ return -EINVAL;
+ if (stats_off < used)
+ return -ERANGE;
+ if (check_add_overflow(stats_off,
+ sizeof(struct io_uring_zcrx_notif_stats),
+ &end))
+ return -ERANGE;
+ if (end > io_region_size(&ifq->rq_region))
+ return -ERANGE;
+
+ ifq->notif_stats = io_region_get_ptr(&ifq->rq_region) + stats_off;
+ memset(ifq->notif_stats, 0, sizeof(*ifq->notif_stats));
+
+ return 0;
+}
+
int io_register_zcrx(struct io_ring_ctx *ctx,
struct io_uring_zcrx_ifq_reg __user *arg)
{
@@ -894,7 +922,9 @@ int io_register_zcrx(struct io_ring_ctx *ctx,
return -EFAULT;
if (notif.type_mask & ~ZCRX_NOTIF_TYPE_MASK)
return -EINVAL;
- if (notif.__resv1 || !mem_is_zero(&notif.__resv2, sizeof(notif.__resv2)))
+ if (notif.flags & ~ZCRX_NOTIF_DESC_FLAG_STATS)
+ return -EINVAL;
+ if (!mem_is_zero(&notif.__resv2, sizeof(notif.__resv2)))
return -EINVAL;

ifq = io_zcrx_ifq_alloc(ctx);
@@ -925,6 +955,12 @@ int io_register_zcrx(struct io_ring_ctx *ctx,
if (ret)
goto err;

+ if (notif.flags & ZCRX_NOTIF_DESC_FLAG_STATS) {
+ ret = zcrx_validate_notif_stats(ifq, &reg, &notif);
+ if (ret)
+ goto err;
+ }
+
ifq->kern_readable = !(area.flags & IORING_ZCRX_AREA_DMABUF);

if (!(reg.flags & ZCRX_REG_NODEV)) {
@@ -1133,6 +1169,11 @@ static void zcrx_notif_tw(struct io_tw_req tw_req, io_tw_token_t tw)
kfree_rcu(req, rcu_head);
}

+static void zcrx_stat_add(__u64 *p, s64 v)
+{
+ WRITE_ONCE(*p, READ_ONCE(*p) + v);
+}
+
static void zcrx_send_notif(struct io_zcrx_ifq *ifq, u32 type_mask)
{
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO;
@@ -1513,8 +1554,13 @@ static int io_zcrx_copy_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
int ret;

ret = io_zcrx_copy_chunk(req, ifq, page, off + skb_frag_off(frag), len);
- if (ret > 0)
+ if (ret > 0) {
+ if (ifq->notif_stats) {
+ zcrx_stat_add(&ifq->notif_stats->copy_count, 1);
+ zcrx_stat_add(&ifq->notif_stats->copy_bytes, ret);
+ }
zcrx_send_notif(ifq, ZCRX_NOTIF_COPY);
+ }

return ret;
}
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 1bd63adaa711..0dcf486ff530 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -82,6 +82,7 @@ struct io_zcrx_ifq {
u32 allowed_notif_mask;
u32 fired_notifs;
u64 notif_data;
+ struct io_uring_zcrx_notif_stats *notif_stats;
};

#if defined(CONFIG_IO_URING_ZCRX)
--
2.52.0