Re: Soft lock-ups caused by iptables
From: Florian Westphal
Date: Thu Nov 20 2025 - 15:46:36 EST
Hamza Mahfooz <hamzamahfooz@xxxxxxxxxxxxxxxxxxx> wrote:
> On Thu, Nov 20, 2025 at 10:34:46AM +0100, Florian Westphal wrote:
> > netfilter: nf_tables: avoid chain re-validation if possible
> >
> > Consider:
> >
> > input -> j2 -> j3
> > input -> j2 -> j3
> > input -> j1 -> j2 -> j3
> >
> > Then the second rule does not need to revalidate j2, and, by extension j3.
> >
> > We need to validate it only for rule 3.
> >
> > This is needed because chain loop detection also ensures we do not
> > exceed the jump stack: Just because we know that j2 is cycle free, its
> > last jump might now exceed the allowed stack. We also need to update
> > the new largest call depth for all the reachable nodes.
> >
> > diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
> > --- a/include/net/netfilter/nf_tables.h
> > +++ b/include/net/netfilter/nf_tables.h
> > @@ -1109,6 +1109,7 @@ struct nft_rule_blob {
> > * @udlen: user data length
> > * @udata: user data in the chain
> > * @blob_next: rule blob pointer to the next in the chain
> > + * @depth: chain was validated for call level <= depth
> > */
> > struct nft_chain {
> > struct nft_rule_blob __rcu *blob_gen_0;
> > @@ -1128,9 +1129,10 @@ struct nft_chain {
> >
> > /* Only used during control plane commit phase: */
> > struct nft_rule_blob *blob_next;
> > + u8 depth;
> > };
> >
> > -int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
> > +int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain);
> > int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
> > const struct nft_set_iter *iter,
> > struct nft_elem_priv *elem_priv);
> > diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> > --- a/net/netfilter/nf_tables_api.c
> > +++ b/net/netfilter/nf_tables_api.c
> > @@ -4088,15 +4088,26 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
> > * and set lookups until either the jump limit is hit or all reachable
> > * chains have been validated.
> > */
> > -int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
> > +int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain)
> > {
> > struct nft_expr *expr, *last;
> > struct nft_rule *rule;
> > int err;
> >
> > + BUILD_BUG_ON(NFT_JUMP_STACK_SIZE > 255);
> > if (ctx->level == NFT_JUMP_STACK_SIZE)
> > return -EMLINK;
> >
> > + /* jumps to base chains are not allowed, this is already
> > + * validated by nft_verdict_init().
> > + *
> > + * Chain must be re-validated if we are entering for first
> > + * time or if the current jumpstack usage is higher than on
> > + * previous check.
> > + */
> > + if (ctx->level && chain->depth >= ctx->level)
> > + return 0;
> > +
> > list_for_each_entry(rule, &chain->rules, list) {
> > if (fatal_signal_pending(current))
> > return -EINTR;
> > @@ -4117,6 +4128,10 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
> > }
> > }
> >
> > + /* Chain needs no re-validation if called again
> > + * from a path that doesn't exceed level.
> > + */
> > + chain->depth = ctx->level;
> > return 0;
> > }
> > EXPORT_SYMBOL_GPL(nft_chain_validate);
> > @@ -4128,7 +4143,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
> > .net = net,
> > .family = table->family,
> > };
> > - int err;
> > + int err = 0;
> >
> > list_for_each_entry(chain, &table->chains, list) {
> > if (!nft_is_base_chain(chain))
> > @@ -4137,12 +4152,16 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
> > ctx.chain = chain;
> > err = nft_chain_validate(&ctx, chain);
> > if (err < 0)
> > - return err;
> > + goto err;
> >
> > cond_resched();
> > }
> >
> > - return 0;
> > +err:
> > + list_for_each_entry(chain, &table->chains, list)
> > + chain->depth = 0;
> > +
> > + return err;
> > }
> >
> > int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
>
> FWIW This patch seems to resolve the issue, assuming you intended to
> include the following:
Thanks for testing. I will try to make this work universally next week
(this needs more work to keep a bitmask of base hook types for
which we already validated this). And we likely need to improve
existing test coverage, the above patch should fail the tests we have.