[PATCH RFC v1 net-next 12/12] netfilter: nft_flow_offload: Add bridgeflow to nft_flow_offload_eval()

From: Eric Woudstra
Date: Sun Oct 13 2024 - 14:59:39 EST


Edit nft_flow_offload_eval() to make it possible to handle a flowtable of
the nft bridge family.

Use nft_flow_offload_bridge_init() to fill the flow tuples. It uses
nft_dev_fill_bridge_path() in each direction.

Signed-off-by: Eric Woudstra <ericwouds@xxxxxxxxx>
---
net/netfilter/nft_flow_offload.c | 142 +++++++++++++++++++++++++++++--
1 file changed, 137 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 2923286d475e..bd4850691baa 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -184,6 +184,129 @@ static bool nft_flowtable_find_dev(const struct net_device *dev,
return found;
}

+static int nft_dev_fill_bridge_path(struct flow_offload *flow,
+ struct nft_flowtable *ft,
+ const struct nft_pktinfo *pkt,
+ enum ip_conntrack_dir dir,
+ const struct net_device *src_dev,
+ const struct net_device *dst_dev,
+ unsigned char *src_ha,
+ unsigned char *dst_ha)
+{
+ struct flow_offload_tuple_rhash *th = flow->tuplehash;
+ struct net_device_path_stack stack;
+ struct net_device_path_ctx ctx = {};
+ struct nft_forward_info info = {};
+ int i, j = 0;
+
+ for (i = th[dir].tuple.encap_num - 1; i >= 0 ; i--) {
+ if (info.num_encaps >= NF_FLOW_TABLE_ENCAP_MAX)
+ return -1;
+ info.encap[info.num_encaps].id = th[dir].tuple.encap[i].id;
+ info.encap[info.num_encaps].proto = th[dir].tuple.encap[i].proto;
+ info.num_encaps++;
+
+ if (th[dir].tuple.encap[i].proto == htons(ETH_P_PPP_SES))
+ continue;
+
+ if (ctx.num_vlans >= NET_DEVICE_PATH_VLAN_MAX)
+ return -1;
+ ctx.vlan[ctx.num_vlans].id = th[dir].tuple.encap[i].id;
+ ctx.vlan[ctx.num_vlans].proto = th[dir].tuple.encap[i].proto;
+ ctx.num_vlans++;
+ }
+ ctx.dev = src_dev;
+ ether_addr_copy(ctx.daddr, dst_ha);
+
+ if (dev_fill_bridge_path(&ctx, &stack) < 0)
+ return -1;
+
+ nft_dev_path_info(&stack, &info, dst_ha, &ft->data);
+
+ if (!info.indev || info.indev != dst_dev)
+ return -1;
+
+ th[!dir].tuple.iifidx = info.indev->ifindex;
+ for (i = info.num_encaps - 1; i >= 0; i--) {
+ th[!dir].tuple.encap[j].id = info.encap[i].id;
+ th[!dir].tuple.encap[j].proto = info.encap[i].proto;
+ if (info.ingress_vlans & BIT(i))
+ th[!dir].tuple.in_vlan_ingress |= BIT(j);
+ j++;
+ }
+ th[!dir].tuple.encap_num = info.num_encaps;
+
+ th[dir].tuple.mtu = dst_dev->mtu;
+ ether_addr_copy(th[dir].tuple.out.h_source, src_ha);
+ ether_addr_copy(th[dir].tuple.out.h_dest, dst_ha);
+ th[dir].tuple.out.ifidx = info.outdev->ifindex;
+ th[dir].tuple.out.hw_ifidx = info.hw_outdev->ifindex;
+ th[dir].tuple.xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
+
+ return 0;
+}
+
+static int nft_flow_offload_bridge_init(struct flow_offload *flow,
+ const struct nft_pktinfo *pkt,
+ enum ip_conntrack_dir dir,
+ struct nft_flowtable *ft)
+{
+ struct ethhdr *eth = eth_hdr(pkt->skb);
+ struct flow_offload_tuple *tuple;
+ const struct net_device *out_dev;
+ const struct net_device *in_dev;
+ int err, i = 0;
+
+ in_dev = nft_in(pkt);
+ if (!in_dev || !nft_flowtable_find_dev(in_dev, ft))
+ return -1;
+
+ out_dev = nft_out(pkt);
+ if (!out_dev || !nft_flowtable_find_dev(out_dev, ft))
+ return -1;
+
+ tuple = &flow->tuplehash[!dir].tuple;
+
+ if (skb_vlan_tag_present(pkt->skb)) {
+ tuple->encap[i].id = skb_vlan_tag_get(pkt->skb);
+ tuple->encap[i].proto = pkt->skb->vlan_proto;
+ i++;
+ }
+ switch (pkt->skb->protocol) {
+ case htons(ETH_P_8021Q):
+ struct vlan_hdr *vhdr;
+
+ vhdr = (struct vlan_hdr *)skb_network_header(pkt->skb);
+ tuple->encap[i].id = ntohs(vhdr->h_vlan_TCI);
+ tuple->encap[i].proto = pkt->skb->protocol;
+ i++;
+ break;
+ case htons(ETH_P_PPP_SES):
+ struct pppoe_hdr *phdr;
+
+ phdr = (struct pppoe_hdr *)skb_network_header(pkt->skb);
+ tuple->encap[i].id = ntohs(phdr->sid);
+ tuple->encap[i].proto = pkt->skb->protocol;
+ i++;
+ break;
+ }
+ tuple->encap_num = i;
+
+ err = nft_dev_fill_bridge_path(flow, ft, pkt, !dir, out_dev, in_dev,
+ eth->h_dest, eth->h_source);
+ if (err < 0)
+ return err;
+
+ memset(tuple->encap, 0, sizeof(tuple->encap));
+
+ err = nft_dev_fill_bridge_path(flow, ft, pkt, dir, in_dev, out_dev,
+ eth->h_source, eth->h_dest);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
static void nft_dev_forward_path(struct nf_flow_route *route,
const struct nf_conn *ct,
enum ip_conntrack_dir dir,
@@ -294,6 +417,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
{
struct nft_flow_offload *priv = nft_expr_priv(expr);
struct nf_flowtable *flowtable = &priv->flowtable->data;
+ bool routing = (flowtable->type->family != NFPROTO_BRIDGE);
struct tcphdr _tcph, *tcph = NULL;
struct nf_flow_route route = {};
enum ip_conntrack_info ctinfo;
@@ -347,14 +471,20 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
goto out;

dir = CTINFO2DIR(ctinfo);
- if (nft_flow_route(pkt, ct, &route, dir, priv->flowtable) < 0)
- goto err_flow_route;
+ if (routing) {
+ if (nft_flow_route(pkt, ct, &route, dir, priv->flowtable) < 0)
+ goto err_flow_route;
+ }

flow = flow_offload_alloc(ct);
if (!flow)
goto err_flow_alloc;

- flow_offload_route_init(flow, &route);
+ if (routing)
+ flow_offload_route_init(flow, &route);
+ else
+ if (nft_flow_offload_bridge_init(flow, pkt, dir, priv->flowtable) < 0)
+ goto err_flow_route;

if (tcph) {
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
@@ -407,8 +537,10 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
err_flow_add:
flow_offload_free(flow);
err_flow_alloc:
- dst_release(route.tuple[dir].dst);
- dst_release(route.tuple[!dir].dst);
+ if (routing) {
+ dst_release(route.tuple[dir].dst);
+ dst_release(route.tuple[!dir].dst);
+ }
err_flow_route:
clear_bit(IPS_OFFLOAD_BIT, &ct->status);
out:
--
2.45.2