[PATCH] Periodically flow expire from flow offload tables
From: Michael Lilja
Date: Sun Oct 23 2022 - 13:17:35 EST
When a flow is added to a flow table for offload SW/HW-offload
the user has no means of controlling the flow once it has
been offloaded. If a number of firewall rules has been made using
time schedules then these rules doesn't apply for the already
offloaded flows. Adding new firewall rules also doesn't affect
already offloaded flows.
This patch handle flow table retirement giving the user the option
to at least periodically get the flow back into control of the
firewall rules so already offloaded flows can be dropped or be
pushed back to flow offload tables.
The flow retirement is disabled by default and can be set in seconds
using sysctl -w net.netfilter.nf_flowtable_retire
Signed-off-by: Michael Lilja <michael.lilja@xxxxxxxxx>
---
.../networking/nf_conntrack-sysctl.rst | 7 ++++++
include/net/netfilter/nf_flow_table.h | 1 +
include/net/netns/conntrack.h | 3 +++
net/netfilter/nf_conntrack_standalone.c | 17 ++++++++++++++
net/netfilter/nf_flow_table_core.c | 23 +++++++++++++++----
5 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst
index 1120d71f28d7..ab4071bc64c1 100644
--- a/Documentation/networking/nf_conntrack-sysctl.rst
+++ b/Documentation/networking/nf_conntrack-sysctl.rst
@@ -201,3 +201,10 @@ nf_flowtable_udp_timeout - INTEGER (seconds)
Control offload timeout for udp connections.
UDP connections may be offloaded from nf conntrack to nf flow table.
Once aged, the connection is returned to nf conntrack with udp pickup timeout.
+
+nf_flowtable_retire - INTEGER (seconds)
+ - 0 - disabled (default)
+ - not 0 - enabled and set the number of seconds a flow is offloaded
+
+ If this option is enabled offloaded flows retire periodically and return the
+ control of the flow to conntrack/netfilter.
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index cd982f4a0f50..f5643c24fb55 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -177,6 +177,7 @@ struct flow_offload {
unsigned long flags;
u16 type;
u32 timeout;
+ u32 retire;
struct rcu_head rcu_head;
};
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index e1290c159184..7567d5fa8220 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -110,5 +110,8 @@ struct netns_ct {
#if defined(CONFIG_NF_CONNTRACK_LABELS)
unsigned int labels_used;
#endif
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ unsigned int sysctl_flowtable_retire;
+#endif
};
#endif
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 4ffe84c5a82c..92ed07b93846 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -620,6 +620,9 @@ enum nf_ct_sysctl_index {
#ifdef CONFIG_LWTUNNEL
NF_SYSCTL_CT_LWTUNNEL,
#endif
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ NF_SYSCTL_CT_FLOWTABLE_RETIRE,
+#endif
__NF_SYSCTL_CT_LAST_SYSCTL,
};
@@ -967,6 +970,15 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = nf_hooks_lwtunnel_sysctl_handler,
},
+#endif
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ [NF_SYSCTL_CT_FLOWTABLE_RETIRE] = {
+ .procname = "nf_flowtable_retire",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .data = &init_net.ct.sysctl_flowtable_retire,
+ .proc_handler = proc_dointvec_jiffies,
+ },
#endif
{}
};
@@ -1111,6 +1123,11 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
nf_conntrack_standalone_init_dccp_sysctl(net, table);
nf_conntrack_standalone_init_gre_sysctl(net, table);
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ /* Disable retire per default */
+ net->ct.sysctl_flowtable_retire = 0;
+#endif
+
/* Don't allow non-init_net ns to alter global sysctls */
if (!net_eq(&init_net, net)) {
table[NF_SYSCTL_CT_MAX].mode = 0444;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 81c26a96c30b..0a449dec8565 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -285,6 +285,12 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
int err;
flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+ if (nf_ct_net(flow->ct)->ct.sysctl_flowtable_retire) {
+ flow->retire = nf_flowtable_time_stamp +
+ nf_ct_net(flow->ct)->ct.sysctl_flowtable_retire;
+ } else {
+ flow->retire = 0;
+ }
err = rhashtable_insert_fast(&flow_table->rhashtable,
&flow->tuplehash[0].node,
@@ -313,6 +319,11 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
}
EXPORT_SYMBOL_GPL(flow_offload_add);
+static inline bool nf_flow_has_retired(const struct flow_offload *flow)
+{
+ return flow->retire && nf_flow_timeout_delta(flow->retire) <= 0;
+}
+
void flow_offload_refresh(struct nf_flowtable *flow_table,
struct flow_offload *flow)
{
@@ -327,7 +338,8 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
if (likely(!nf_flowtable_hw_offload(flow_table)))
return;
- nf_flow_offload_add(flow_table, flow);
+ if (!nf_flow_has_retired(flow))
+ nf_flow_offload_add(flow_table, flow);
}
EXPORT_SYMBOL_GPL(flow_offload_refresh);
@@ -339,6 +351,7 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
static void flow_offload_del(struct nf_flowtable *flow_table,
struct flow_offload *flow)
{
+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
nf_flow_offload_rhash_params);
@@ -423,12 +436,14 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
nf_ct_is_dying(flow->ct))
flow_offload_teardown(flow);
- if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags) || nf_flow_has_retired(flow)) {
if (test_bit(NF_FLOW_HW, &flow->flags)) {
- if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
+ if (!test_bit(NF_FLOW_HW_DYING, &flow->flags)) {
nf_flow_offload_del(flow_table, flow);
- else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
+ } else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags)) {
+ clear_bit(NF_FLOW_HW, &flow->flags);
flow_offload_del(flow_table, flow);
+ }
} else {
flow_offload_del(flow_table, flow);
}
--
2.37.2