Re: [PATCH v3 6/7] seccomp: allow nested listeners

From: Andy Lutomirski

Date: Fri Dec 12 2025 - 08:57:43 EST


On Thu, Dec 11, 2025 at 8:47 PM Alexander Mikhalitsyn
<aleksandr.mikhalitsyn@xxxxxxxxxxxxx> wrote:
>
> Now everything is ready to get rid of "only one listener per tree"
> limitation.
>
> Let's introduce a new uAPI flag
> SECCOMP_FILTER_FLAG_ALLOW_NESTED_LISTENERS, so userspace may explicitly
> allow nested listeners when installing a listener.
>
> Note, that to install n-th listener, this flag must be set on all
> the listeners up the tree.


> diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
> index cff0fa7f3175..b9633ab1ed47 100644
> --- a/Documentation/userspace-api/seccomp_filter.rst
> +++ b/Documentation/userspace-api/seccomp_filter.rst
> @@ -210,6 +210,12 @@ notifications from both tasks will appear on the same filter fd. Reads and
> writes to/from a filter fd are also synchronized, so a filter fd can safely
> have many readers.
>
> +By default, only one listener within seccomp filters tree is allowed. On attempt
> +to add a new listener when one already exists in the filter tree, the
> +``seccomp()`` call will fail with ``-EBUSY``. To allow multiple listeners, the
> +``SECCOMP_FILTER_FLAG_ALLOW_NESTED_LISTENERS`` flag can be passed in addition to
> +the ``SECCOMP_FILTER_FLAG_NEW_LISTENER`` flag.
> +

I read this, and I contemplated: does this mean that this permits
additional filters (added later, nested inside) to have listeners or
does it permit applying a listener when there already is one? I
thought it was surely it's the former, but I had to read the code to
confirm that.

Maybe clarify the text?

(Yes, I realize it's also in the commit message, but that's not a
great place to hide this info.)


> The interface for a seccomp notification fd consists of two structures:
>
> .. code-block:: c
> diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
> index 9b959972bf4a..9b060946019d 100644
> --- a/include/linux/seccomp.h
> +++ b/include/linux/seccomp.h
> @@ -10,7 +10,8 @@
> SECCOMP_FILTER_FLAG_SPEC_ALLOW | \
> SECCOMP_FILTER_FLAG_NEW_LISTENER | \
> SECCOMP_FILTER_FLAG_TSYNC_ESRCH | \
> - SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV)
> + SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV | \
> + SECCOMP_FILTER_FLAG_ALLOW_NESTED_LISTENERS)
>
> /* sizeof() the first published struct seccomp_notif_addfd */
> #define SECCOMP_NOTIFY_ADDFD_SIZE_VER0 24
> diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> index dbfc9b37fcae..de78d8e7a70b 100644
> --- a/include/uapi/linux/seccomp.h
> +++ b/include/uapi/linux/seccomp.h
> @@ -18,13 +18,14 @@
> #define SECCOMP_GET_NOTIF_SIZES 3
>
> /* Valid flags for SECCOMP_SET_MODE_FILTER */
> -#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
> -#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
> -#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
> -#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
> -#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
> +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
> +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
> +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
> +#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
> +#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
> /* Received notifications wait in killable state (only respond to fatal signals) */
> -#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
> +#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
> +#define SECCOMP_FILTER_FLAG_ALLOW_NESTED_LISTENERS (1UL << 6)
>
> /*
> * All BPF programs must return a 32-bit value.
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index 51d0d8adaffb..7667f443ff6c 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -206,6 +206,7 @@ static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter)
> * @wait_killable_recv: Put notifying process in killable state once the
> * notification is received by the userspace listener.
> * @first_listener: true if this is the first seccomp listener installed in the tree.
> + * @allow_nested_listeners: Allow nested seccomp listeners.
> * @prev: points to a previously installed, or inherited, filter
> * @prog: the BPF program to evaluate
> * @notif: the struct that holds all notification related information
> @@ -228,6 +229,7 @@ struct seccomp_filter {
> bool log : 1;
> bool wait_killable_recv : 1;
> bool first_listener : 1;
> + bool allow_nested_listeners : 1;
> struct action_cache cache;
> struct seccomp_filter *prev;
> struct bpf_prog *prog;
> @@ -956,6 +958,10 @@ static long seccomp_attach_filter(unsigned int flags,
> if (flags & SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV)
> filter->wait_killable_recv = true;
>
> + /* Set nested listeners allow flag, if present. */
> + if (flags & SECCOMP_FILTER_FLAG_ALLOW_NESTED_LISTENERS)
> + filter->allow_nested_listeners = true;
> +
> /*
> * If there is an existing filter, make it the prev and don't drop its
> * task reference.
> @@ -1997,7 +2003,8 @@ static struct file *init_listener(struct seccomp_filter *filter)
> }
>
> /*
> - * Does @new_child have a listener while an ancestor also has a listener?
> + * Does @new_child have a listener while an ancestor also has a listener
> + * and hasn't allowed nesting?
> * If so, we'll want to reject this filter.
> * This only has to be tested for the current process, even in the TSYNC case,
> * because TSYNC installs @child with the same parent on all threads.
> @@ -2015,7 +2022,12 @@ static bool check_duplicate_listener(struct seccomp_filter *new_child)
> return false;
> for (cur = current->seccomp.filter; cur; cur = cur->prev) {
> if (!IS_ERR_OR_NULL(cur->notif))
> - return true;
> + /*
> + * We don't need to go up further, because if there is a
> + * listener with nesting allowed, then all the listeners
> + * up the tree have allowed nesting as well.
> + */
> + return !cur->allow_nested_listeners;
> }
>
> /* Mark first listener in the tree. */
> @@ -2062,10 +2074,12 @@ static long seccomp_set_mode_filter(unsigned int flags,
> return -EINVAL;
>
> /*
> - * The SECCOMP_FILTER_FLAG_WAIT_KILLABLE_SENT flag doesn't make sense
> + * The SECCOMP_FILTER_FLAG_WAIT_KILLABLE_SENT and
> + * SECCOMP_FILTER_FLAG_ALLOW_NESTED_LISTENERS flags don't make sense
> * without the SECCOMP_FILTER_FLAG_NEW_LISTENER flag.
> */
> - if ((flags & SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV) &&
> + if (((flags & SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV) ||
> + (flags & SECCOMP_FILTER_FLAG_ALLOW_NESTED_LISTENERS)) &&
> ((flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) == 0))
> return -EINVAL;
>
> diff --git a/tools/include/uapi/linux/seccomp.h b/tools/include/uapi/linux/seccomp.h
> index dbfc9b37fcae..de78d8e7a70b 100644
> --- a/tools/include/uapi/linux/seccomp.h
> +++ b/tools/include/uapi/linux/seccomp.h
> @@ -18,13 +18,14 @@
> #define SECCOMP_GET_NOTIF_SIZES 3
>
> /* Valid flags for SECCOMP_SET_MODE_FILTER */
> -#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
> -#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
> -#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
> -#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
> -#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
> +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
> +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
> +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
> +#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
> +#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
> /* Received notifications wait in killable state (only respond to fatal signals) */
> -#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
> +#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
> +#define SECCOMP_FILTER_FLAG_ALLOW_NESTED_LISTENERS (1UL << 6)
>
> /*
> * All BPF programs must return a 32-bit value.
> --
> 2.43.0
>


--
Andy Lutomirski
AMA Capital Management, LLC