Re: [PATCH 2/2] io_uring/bpf_filter: pass in expected filter payload size

From: Christian Brauner

Date: Tue Feb 17 2026 - 08:38:43 EST


On Wed, Feb 11, 2026 at 08:01:18AM -0700, Jens Axboe wrote:
> It's quite possible that opcodes that have payloads attached to them,
> like IORING_OP_OPENAT/OPENAT2 or IORING_OP_SOCKET, that these paylods
> can change over time. For example, on the openat/openat2 side, the
> struct open_how argument is extensible, and could be extended in the
> future to allow further arguments to be passed in.
>
> Allow registration of a cBPF filter to give the size of the filter as
> seen by userspace. If that filter is for an opcode that takes extra
> payload data, allow it if the application payload expectation is the
> same size than the kernels. If that is the case, the kernel supports
> filtering on the payload that the application expects. If the size
> differs, the behavior depends on the IO_URING_BPF_FILTER_SZ_STRICT flag:
>
> 1) If IO_URING_BPF_FILTER_SZ_STRICT is set and the size expectation
> differs, fail the attempt to load the filter.
>
> 2) If IO_URING_BPF_FILTER_SZ_STRICT isn't set, allow the filter if
> the userspace pdu size is smaller than what the kernel offers.
>
> 3) Regardless if IO_URING_BPF_FILTER_SZ_STRICT, fail loading the filter
> if the userspace pdu size is bigger than what the kernel supports.
>
> An attempt to load a filter due to sizing will error with -EMSGSIZE.
> For that error, the registration struct will have filter->pdu_size
> populated with the pdu size that the kernel uses.
>
> Reported-by: Christian Brauner <brauner@xxxxxxxxxx>
> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
> ---
> include/uapi/linux/io_uring/bpf_filter.h | 8 ++-
> io_uring/bpf_filter.c | 65 ++++++++++++++++++------
> 2 files changed, 56 insertions(+), 17 deletions(-)
>
> diff --git a/include/uapi/linux/io_uring/bpf_filter.h b/include/uapi/linux/io_uring/bpf_filter.h
> index 220351b81bc0..1b461d792a7b 100644
> --- a/include/uapi/linux/io_uring/bpf_filter.h
> +++ b/include/uapi/linux/io_uring/bpf_filter.h
> @@ -35,13 +35,19 @@ enum {
> * If set, any currently unset opcode will have a deny filter attached
> */
> IO_URING_BPF_FILTER_DENY_REST = 1,
> + /*
> + * If set, if kernel and application don't agree on pdu_size for
> + * the given opcode, fail the registration of the filter.
> + */
> + IO_URING_BPF_FILTER_SZ_STRICT = 2,
> };
>
> struct io_uring_bpf_filter {
> __u32 opcode; /* io_uring opcode to filter */
> __u32 flags;
> __u32 filter_len; /* number of BPF instructions */
> - __u32 resv;
> + __u8 pdu_size; /* expected pdu size for opcode */
> + __u8 resv[3];
> __u64 filter_ptr; /* pointer to BPF filter */
> __u64 resv2[5];
> };

You want this fixed-size?
You could use copy_struct_from_user() and copy_struct_to_user() and then
you can grow the struct on-demand without having to reserve space aka
the struct clone_args and struct mount_attr etc. model.

> diff --git a/io_uring/bpf_filter.c b/io_uring/bpf_filter.c
> index 8ac7d06de122..4e1dd955c8c4 100644
> --- a/io_uring/bpf_filter.c
> +++ b/io_uring/bpf_filter.c
> @@ -308,36 +308,69 @@ static struct io_bpf_filters *io_bpf_filter_cow(struct io_restriction *src)
> return ERR_PTR(-EBUSY);
> }
>
> -#define IO_URING_BPF_FILTER_FLAGS IO_URING_BPF_FILTER_DENY_REST
> +#define IO_URING_BPF_FILTER_FLAGS (IO_URING_BPF_FILTER_DENY_REST | \
> + IO_URING_BPF_FILTER_SZ_STRICT)
>
> -int io_register_bpf_filter(struct io_restriction *res,
> - struct io_uring_bpf __user *arg)
> +static int io_bpf_filter_import(struct io_uring_bpf *reg,
> + struct io_uring_bpf __user *arg)
> {
> - struct io_bpf_filters *filters, *old_filters = NULL;
> - struct io_bpf_filter *filter, *old_filter;
> - struct io_uring_bpf reg;
> - struct bpf_prog *prog;
> - struct sock_fprog fprog;
> + const struct io_issue_def *def;
> int ret;
>
> - if (copy_from_user(&reg, arg, sizeof(reg)))
> + if (copy_from_user(reg, arg, sizeof(*reg)))
> return -EFAULT;
> - if (reg.cmd_type != IO_URING_BPF_CMD_FILTER)
> + if (reg->cmd_type != IO_URING_BPF_CMD_FILTER)
> return -EINVAL;
> - if (reg.cmd_flags || reg.resv)
> + if (reg->cmd_flags || reg->resv)
> return -EINVAL;
>
> - if (reg.filter.opcode >= IORING_OP_LAST)
> + if (reg->filter.opcode >= IORING_OP_LAST)
> return -EINVAL;
> - if (reg.filter.flags & ~IO_URING_BPF_FILTER_FLAGS)
> + if (reg->filter.flags & ~IO_URING_BPF_FILTER_FLAGS)
> return -EINVAL;
> - if (reg.filter.resv)
> + if (!mem_is_zero(reg->filter.resv, sizeof(reg->filter.resv)))
> return -EINVAL;
> - if (!mem_is_zero(reg.filter.resv2, sizeof(reg.filter.resv2)))
> + if (!mem_is_zero(reg->filter.resv2, sizeof(reg->filter.resv2)))
> return -EINVAL;
> - if (!reg.filter.filter_len || reg.filter.filter_len > BPF_MAXINSNS)
> + if (!reg->filter.filter_len || reg->filter.filter_len > BPF_MAXINSNS)
> return -EINVAL;
>
> + /* Verify filter size */
> + def = &io_issue_defs[reg->filter.opcode];
> +
> + /* same size, always ok */
> + ret = 0;
> + if (reg->filter.pdu_size == def->filter_pdu_size)
> + ;

Odd way of writing this if-else ladder :)


> + /* size differs, fail in strict mode */
> + else if (reg->filter.flags & IO_URING_BPF_FILTER_SZ_STRICT)
> + ret = -EMSGSIZE;
> + /* userspace filter is bigger, always disallow */
> + else if (reg->filter.pdu_size > def->filter_pdu_size)
> + ret = -EMSGSIZE;
> +
> + /* copy back kernel filter size */
> + reg->filter.pdu_size = def->filter_pdu_size;
> + if (copy_to_user(&arg->filter, &reg->filter, sizeof(reg->filter)))
> + return -EFAULT;
> +
> + return ret;
> +}
> +
> +int io_register_bpf_filter(struct io_restriction *res,
> + struct io_uring_bpf __user *arg)
> +{
> + struct io_bpf_filters *filters, *old_filters = NULL;
> + struct io_bpf_filter *filter, *old_filter;
> + struct io_uring_bpf reg;
> + struct bpf_prog *prog;
> + struct sock_fprog fprog;
> + int ret;
> +
> + ret = io_bpf_filter_import(&reg, arg);
> + if (ret)
> + return ret;
> +
> fprog.len = reg.filter.filter_len;
> fprog.filter = u64_to_user_ptr(reg.filter.filter_ptr);
>
> --
> 2.51.0
>