[RFC] bridge-nf -- map IPv4 hooks onto bridge hooks, vs 2.5.42

From: Bart De Schuymer (bart.de.schuymer@pandora.be)
Date: Mon Oct 14 2002 - 13:05:06 EST


Hello David, list,

David, you asked for a patch for mapping the IPv4 hooks onto the bridge hooks,
this is the patch.

Comments and suggestions for better solutions are welcome.

I don't know how to (easily) move the copy of the Ethernet header out of
ip_output.c::ip_fragment.c. The patch adds 3 members to the skbuff...

There is a little text file explaining the source code more in-depth here:
http://users.pandora.be/bart.de.schuymer/ebtables/br-nf/bridge-nf-0.0.10-dev-pre1-against-2.5.39-comments.txt
A high-level explanation of what we're doing is here:
http://users.pandora.be/bart.de.schuymer/ebtables/br_fw_ia/br_fw_ia.html
The patch is also available at:
http://users.pandora.be/bart.de.schuymer/ebtables/br-nf/bridge-nf-0.0.10-dev-pre1-against-2.5.42.diff

Note that Lennert has not yet responded to my private RFC to him, but
Halloween is coming, so we should at least try to get this into 2.5...

cheers,
Bart

Here's the patch:

--- linux-2.5.42/include/linux/netfilter.h Sat Oct 12 06:22:08 2002
+++ linux-2.5.42-brnf/include/linux/netfilter.h Sun Oct 13 11:45:19 2002
@@ -117,17 +117,23 @@
 /* This is gross, but inline doesn't cut it for avoiding the function
    call in fast path: gcc doesn't inline (needs value tracking?). --RR */
 #ifdef CONFIG_NETFILTER_DEBUG
-#define NF_HOOK nf_hook_slow
+#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
+ nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn), INT_MIN)
+#define NF_HOOK_THRESH nf_hook_slow
 #else
 #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
 (list_empty(&nf_hooks[(pf)][(hook)]) \
  ? (okfn)(skb) \
- : nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn)))
+ : nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn), INT_MIN))
+#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
+(list_empty(&nf_hooks[(pf)][(hook)]) \
+ ? (okfn)(skb) \
+ : nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn), (thresh)))
 #endif
 
 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
                  struct net_device *indev, struct net_device *outdev,
- int (*okfn)(struct sk_buff *));
+ int (*okfn)(struct sk_buff *), int thresh);
 
 /* Call setsockopt() */
 int nf_setsockopt(struct sock *sk, int pf, int optval, char *opt,
--- linux-2.5.42/include/linux/netfilter_ipv4.h Sat Oct 12 06:22:18 2002
+++ linux-2.5.42-brnf/include/linux/netfilter_ipv4.h Sun Oct 13 11:45:19 2002
@@ -52,8 +52,10 @@
 enum nf_ip_hook_priorities {
         NF_IP_PRI_FIRST = INT_MIN,
         NF_IP_PRI_CONNTRACK = -200,
+ NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD = -175,
         NF_IP_PRI_MANGLE = -150,
         NF_IP_PRI_NAT_DST = -100,
+ NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT = -50,
         NF_IP_PRI_FILTER = 0,
         NF_IP_PRI_NAT_SRC = 100,
         NF_IP_PRI_LAST = INT_MAX,
--- linux-2.5.42/include/linux/netfilter_bridge.h Sat Oct 12 06:22:09 2002
+++ linux-2.5.42-brnf/include/linux/netfilter_bridge.h Sun Oct 13 11:45:19
2002
@@ -22,14 +22,27 @@
 #define NF_BR_BROUTING 5
 #define NF_BR_NUMHOOKS 6
 
+/* Masks for skb->brnfmask */
+#define BRNF_PKT_TYPE 0x01
+#define BRNF_BRIDGED_DNAT 0x02
+#define BRNF_COPY_HEADER 0x04
+#define BRNF_DONT_TAKE_PARENT 0x08
+
 enum nf_br_hook_priorities {
         NF_BR_PRI_FIRST = INT_MIN,
- NF_BR_PRI_FILTER_BRIDGED = -200,
- NF_BR_PRI_FILTER_OTHER = 200,
         NF_BR_PRI_NAT_DST_BRIDGED = -300,
+ NF_BR_PRI_FILTER_BRIDGED = -200,
+ NF_BR_PRI_BRNF = 0,
         NF_BR_PRI_NAT_DST_OTHER = 100,
+ NF_BR_PRI_FILTER_OTHER = 200,
         NF_BR_PRI_NAT_SRC = 300,
         NF_BR_PRI_LAST = INT_MAX,
 };
 
+/* Used in br_netfilter.c */
+struct bridge_skb_cb {
+ union {
+ __u32 ipv4;
+ } daddr;
+};
 #endif
--- linux-2.5.42/include/linux/skbuff.h Sat Oct 12 06:22:09 2002
+++ linux-2.5.42-brnf/include/linux/skbuff.h Sun Oct 13 11:45:19 2002
@@ -140,6 +140,8 @@
  * @sk: Socket we are owned by
  * @stamp: Time we arrived
  * @dev: Device we arrived on/are leaving by
+ * @physindev: Physical device we arrived on - see br_netfilter.c
+ * @physoutdev: Phsical device we will leave by - see br_netfilter.c
  * @h: Transport layer header
  * @nh: Network layer header
  * @mac: Link layer header
@@ -166,6 +168,7 @@
  * @nfcache: Cache info
  * @nfct: Associated connection, if any
  * @nf_debug: Netfilter debugging
+ * @brnfmask: Info about a bridged frame - see br_netfilter.c
  * @tc_index: Traffic control index
  */
 
@@ -178,6 +181,8 @@
         struct sock *sk;
         struct timeval stamp;
         struct net_device *dev;
+ struct net_device *physindev;
+ struct net_device *physoutdev;
 
         union {
                 struct tcphdr *th;
@@ -236,6 +241,7 @@
 #ifdef CONFIG_NETFILTER_DEBUG
         unsigned int nf_debug;
 #endif
+ unsigned int brnfmask;
 #endif /* CONFIG_NETFILTER */
 #if defined(CONFIG_HIPPI)
         union {
--- linux-2.5.42/net/bridge/br.c Sat Oct 12 06:21:34 2002
+++ linux-2.5.42-brnf/net/bridge/br.c Sun Oct 13 11:45:19 2002
@@ -45,6 +45,8 @@
 {
         printk(KERN_INFO "NET4: Ethernet Bridge 008 for NET4.0\n");
 
+ if (br_netfilter_init())
+ return 1;
         br_handle_frame_hook = br_handle_frame;
         br_ioctl_hook = br_ioctl_deviceless_stub;
 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
@@ -63,6 +65,7 @@
 
 static void __exit br_deinit(void)
 {
+ br_netfilter_fini();
         unregister_netdevice_notifier(&br_device_notifier);
         br_call_ioctl_atomic(__br_clear_ioctl_hook);
 
--- linux-2.5.42/net/bridge/br_forward.c Sat Oct 12 06:21:37 2002
+++ linux-2.5.42-brnf/net/bridge/br_forward.c Sun Oct 13 11:45:19 2002
@@ -30,7 +30,7 @@
         return 1;
 }
 
-static int __dev_queue_push_xmit(struct sk_buff *skb)
+int br_dev_queue_push_xmit(struct sk_buff *skb)
 {
         skb_push(skb, ETH_HLEN);
         dev_queue_xmit(skb);
@@ -38,10 +38,10 @@
         return 0;
 }
 
-static int __br_forward_finish(struct sk_buff *skb)
+int br_forward_finish(struct sk_buff *skb)
 {
         NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
- __dev_queue_push_xmit);
+ br_dev_queue_push_xmit);
 
         return 0;
 }
@@ -53,7 +53,7 @@
         skb->nf_debug = 0;
 #endif
         NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
- __br_forward_finish);
+ br_forward_finish);
 }
 
 static void __br_forward(struct net_bridge_port *to, struct sk_buff *skb)
@@ -64,7 +64,7 @@
         skb->dev = to->dev;
 
         NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
- __br_forward_finish);
+ br_forward_finish);
 }
 
 /* called under bridge lock */
--- linux-2.5.42/net/bridge/br_input.c Sat Oct 12 06:21:35 2002
+++ linux-2.5.42-brnf/net/bridge/br_input.c Sun Oct 13 11:45:19 2002
@@ -49,7 +49,7 @@
                         br_pass_frame_up_finish);
 }
 
-static int br_handle_frame_finish(struct sk_buff *skb)
+int br_handle_frame_finish(struct sk_buff *skb)
 {
         struct net_bridge *br;
         unsigned char *dest;
--- linux-2.5.42/net/bridge/br_private.h Sat Oct 12 06:21:35 2002
+++ linux-2.5.42-brnf/net/bridge/br_private.h Sun Oct 13 11:45:19 2002
@@ -144,8 +144,10 @@
 /* br_forward.c */
 extern void br_deliver(struct net_bridge_port *to,
                 struct sk_buff *skb);
+extern int br_dev_queue_push_xmit(struct sk_buff *skb);
 extern void br_forward(struct net_bridge_port *to,
                 struct sk_buff *skb);
+extern int br_forward_finish(struct sk_buff *skb);
 extern void br_flood_deliver(struct net_bridge *br,
                       struct sk_buff *skb,
                       int clone);
@@ -166,6 +168,7 @@
                            int *ifindices);
 
 /* br_input.c */
+extern int br_handle_frame_finish(struct sk_buff *skb);
 extern int br_handle_frame(struct sk_buff *skb);
 
 /* br_ioctl.c */
@@ -176,6 +179,10 @@
              unsigned long arg1,
              unsigned long arg2);
 extern int br_ioctl_deviceless_stub(unsigned long arg);
+
+/* br_netfilter.c */
+extern int br_netfilter_init(void);
+extern void br_netfilter_fini(void);
 
 /* br_stp.c */
 extern int br_is_root_bridge(struct net_bridge *br);
--- linux-2.5.42/net/bridge/Makefile Sat Oct 12 06:22:45 2002
+++ linux-2.5.42-brnf/net/bridge/Makefile Sun Oct 13 11:45:19 2002
@@ -9,6 +9,11 @@
 bridge-objs := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
                         br_ioctl.o br_notify.o br_stp.o br_stp_bpdu.o \
                         br_stp_if.o br_stp_timer.o
+
+ifeq ($(CONFIG_NETFILTER),y)
+bridge-objs += br_netfilter.o
+endif
+
 obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
 
 include $(TOPDIR)/Rules.make
--- linux-2.5.42/net/core/netfilter.c Sat Oct 12 06:22:07 2002
+++ linux-2.5.42-brnf/net/core/netfilter.c Sun Oct 13 11:45:19 2002
@@ -342,10 +342,15 @@
                                const struct net_device *indev,
                                const struct net_device *outdev,
                                struct list_head **i,
- int (*okfn)(struct sk_buff *))
+ int (*okfn)(struct sk_buff *),
+ int hook_thresh)
 {
         for (*i = (*i)->next; *i != head; *i = (*i)->next) {
                 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
+
+ if (hook_thresh > elem->priority)
+ continue;
+
                 switch (elem->hook(hook, skb, indev, outdev, okfn)) {
                 case NF_QUEUE:
                         return NF_QUEUE;
@@ -413,6 +418,8 @@
 {
         int status;
         struct nf_info *info;
+ struct net_device *physindev;
+ struct net_device *physoutdev;
 
         if (!queue_handler[pf].outfn) {
                 kfree_skb(skb);
@@ -435,11 +442,16 @@
         if (indev) dev_hold(indev);
         if (outdev) dev_hold(outdev);
 
+ if ((physindev = skb->physindev)) dev_hold(physindev);
+ if ((physoutdev = skb->physoutdev)) dev_hold(physoutdev);
+
         status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
         if (status < 0) {
                 /* James M doesn't say fuck enough. */
                 if (indev) dev_put(indev);
                 if (outdev) dev_put(outdev);
+ if (physindev) dev_put(physindev);
+ if (physoutdev) dev_put(physoutdev);
                 kfree(info);
                 kfree_skb(skb);
                 return;
@@ -449,7 +461,8 @@
 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
                  struct net_device *indev,
                  struct net_device *outdev,
- int (*okfn)(struct sk_buff *))
+ int (*okfn)(struct sk_buff *),
+ int hook_thresh)
 {
         struct list_head *elem;
         unsigned int verdict;
@@ -481,7 +494,7 @@
 
         elem = &nf_hooks[pf][hook];
         verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
- outdev, &elem, okfn);
+ outdev, &elem, okfn, hook_thresh);
         if (verdict == NF_QUEUE) {
                 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
                 nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
@@ -530,7 +543,7 @@
                 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
                                      &skb, info->hook,
                                      info->indev, info->outdev, &elem,
- info->okfn);
+ info->okfn, INT_MIN);
         }
 
         switch (verdict) {
--- linux-2.5.42/net/core/skbuff.c Sat Oct 12 06:21:34 2002
+++ linux-2.5.42-brnf/net/core/skbuff.c Sun Oct 13 11:45:19 2002
@@ -234,6 +234,8 @@
         skb->sk = NULL;
         skb->stamp.tv_sec = 0; /* No idea about time */
         skb->dev = NULL;
+ skb->physindev = NULL;
+ skb->physoutdev = NULL;
         skb->dst = NULL;
         memset(skb->cb, 0, sizeof(skb->cb));
         skb->pkt_type = PACKET_HOST; /* Default type */
@@ -248,6 +250,7 @@
 #ifdef CONFIG_NETFILTER_DEBUG
         skb->nf_debug = 0;
 #endif
+ skb->brnfmask = 0;
 #endif
 #ifdef CONFIG_NET_SCHED
         skb->tc_index = 0;
@@ -363,6 +366,8 @@
         n->sk = NULL;
         C(stamp);
         C(dev);
+ C(physindev);
+ C(physoutdev);
         C(h);
         C(nh);
         C(mac);
@@ -392,6 +397,7 @@
 #ifdef CONFIG_NETFILTER_DEBUG
         C(nf_debug);
 #endif
+ C(brnfmask);
 #endif /*CONFIG_NETFILTER*/
 #if defined(CONFIG_HIPPI)
         C(private);
@@ -418,6 +424,8 @@
         new->list = NULL;
         new->sk = NULL;
         new->dev = old->dev;
+ new->physindev = old->physindev;
+ new->physoutdev = old->physoutdev;
         new->priority = old->priority;
         new->protocol = old->protocol;
         new->dst = dst_clone(old->dst);
@@ -438,6 +446,7 @@
 #ifdef CONFIG_NETFILTER_DEBUG
         new->nf_debug = old->nf_debug;
 #endif
+ new->brnfmask = old->brnfmask;
 #endif
 #ifdef CONFIG_NET_SCHED
         new->tc_index = old->tc_index;
--- linux-2.5.42/net/ipv4/ip_output.c Sat Oct 12 06:22:45 2002
+++ linux-2.5.42-brnf/net/ipv4/ip_output.c Sun Oct 13 11:45:19 2002
@@ -75,6 +75,7 @@
 #include <net/inetpeer.h>
 #include <linux/igmp.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_bridge.h>
 #include <linux/mroute.h>
 #include <linux/netlink.h>
 
@@ -908,6 +909,18 @@
                 iph->tot_len = htons(len + hlen);
 
                 ip_send_check(iph);
+
+ /*
+ * Fragments with a bridge device destination need
+ * to get the Ethernet header copied here, as
+ * br_dev_queue_push_xmit() can't do this.
+ * See net/bridge/br_netfilter.c
+ */
+
+#ifdef CONFIG_NETFILTER
+ if (skb->brnfmask & BRNF_COPY_HEADER)
+ memcpy(skb2->data - 16, skb->data - 16, 16);
+#endif
 
                 err = output(skb2);
                 if (err)
--- linux-2.5.42/net/ipv4/netfilter/ip_tables.c Sat Oct 12 06:21:35 2002
+++ linux-2.5.42-brnf/net/ipv4/netfilter/ip_tables.c Sun Oct 13 11:45:19 2002
@@ -121,12 +121,14 @@
 static inline int
 ip_packet_match(const struct iphdr *ip,
                 const char *indev,
+ const char *physindev,
                 const char *outdev,
+ const char *physoutdev,
                 const struct ipt_ip *ipinfo,
                 int isfrag)
 {
         size_t i;
- unsigned long ret;
+ unsigned long ret, ret2;
 
 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
 
@@ -156,7 +158,13 @@
                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
         }
 
- if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
+ for (i = 0, ret2 = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+ ret2 |= (((const unsigned long *)physindev)[i]
+ ^ ((const unsigned long *)ipinfo->iniface)[i])
+ & ((const unsigned long *)ipinfo->iniface_mask)[i];
+ }
+
+ if (FWINV(ret != 0 && ret2 != 0, IPT_INV_VIA_IN)) {
                 dprintf("VIA in mismatch (%s vs %s).%s\n",
                         indev, ipinfo->iniface,
                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
@@ -169,7 +177,13 @@
                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
         }
 
- if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
+ for (i = 0, ret2 = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+ ret2 |= (((const unsigned long *)physoutdev)[i]
+ ^ ((const unsigned long *)ipinfo->outiface)[i])
+ & ((const unsigned long *)ipinfo->outiface_mask)[i];
+ }
+
+ if (FWINV(ret != 0 && ret2 != 0, IPT_INV_VIA_OUT)) {
                 dprintf("VIA out mismatch (%s vs %s).%s\n",
                         outdev, ipinfo->outiface,
                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
@@ -268,6 +282,7 @@
         /* Initializing verdict to NF_DROP keeps gcc happy. */
         unsigned int verdict = NF_DROP;
         const char *indev, *outdev;
+ const char *physindev, *physoutdev;
         void *table_base;
         struct ipt_entry *e, *back;
 
@@ -277,6 +292,9 @@
         datalen = (*pskb)->len - ip->ihl * 4;
         indev = in ? in->name : nulldevname;
         outdev = out ? out->name : nulldevname;
+ physindev = (*pskb)->physindev ? (*pskb)->physindev->name : nulldevname;
+ physoutdev = (*pskb)->physoutdev ? (*pskb)->physoutdev->name : nulldevname;
+
         /* We handle fragments by dealing with the first fragment as
          * if it was a normal packet. All other fragments are treated
          * normally, except that they will NEVER match rules that ask
@@ -311,7 +329,8 @@
                 IP_NF_ASSERT(e);
                 IP_NF_ASSERT(back);
                 (*pskb)->nfcache |= e->nfcache;
- if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
+ if (ip_packet_match(ip, indev, physindev, outdev, physoutdev,
+ &e->ip, offset)) {
                         struct ipt_entry_target *t;
 
                         if (IPT_MATCH_ITERATE(e, do_match,
--- linux-2.5.42/net/ipv4/netfilter/ipt_LOG.c Sat Oct 12 06:21:38 2002
+++ linux-2.5.42-brnf/net/ipv4/netfilter/ipt_LOG.c Sun Oct 13 11:45:19 2002
@@ -285,10 +285,13 @@
         level_string[1] = '0' + (loginfo->level % 8);
         spin_lock_bh(&log_lock);
         printk(level_string);
- printk("%sIN=%s OUT=%s ",
- loginfo->prefix,
- in ? in->name : "",
- out ? out->name : "");
+ printk("%sIN=%s ", loginfo->prefix, in ? in->name : "");
+ if ((*pskb)->physindev && in != (*pskb)->physindev)
+ printk("PHYSIN=%s ", (*pskb)->physindev->name);
+ printk("OUT=%s ", out ? out->name : "");
+ if ((*pskb)->physoutdev && out != (*pskb)->physoutdev)
+ printk("PHYSOUT=%s ", (*pskb)->physoutdev->name);
+
         if (in && !out) {
                 /* MAC logging for input chain only. */
                 printk("MAC=");
--- /dev/null Thu Aug 24 11:00:32 2000
+++ linux-2.5.42-brnf/net/bridge/br_netfilter.c Sun Oct 13 11:45:19 2002
@@ -0,0 +1,602 @@
+/*
+ * Handle firewalling
+ * Linux ethernet bridge
+ *
+ * Authors:
+ * Lennert Buytenhek <buytenh@gnu.org>
+ * Bart De Schuymer <bart.de.schuymer@pandora.be>
+ *
+ * $Id: br_netfilter.c,v 1.2 2002/09/11 19:35:44 bdschuym Exp $
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Lennert dedicates this file to Kerstin Wurdinger.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/in_route.h>
+#include <net/ip.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include "br_private.h"
+
+
+#define skb_origaddr(skb) (((struct bridge_skb_cb *) \
+ (skb->cb))->daddr.ipv4)
+#define store_orig_dstaddr(skb) (skb_origaddr(skb) = (skb)->nh.iph->daddr)
+#define dnat_took_place(skb) (skb_origaddr(skb) != (skb)->nh.iph->daddr)
+#define clear_cb(skb) (memset(&skb_origaddr(skb), 0, \
+ sizeof(struct bridge_skb_cb)))
+
+#define has_bridge_parent(device) ((device)->br_port != NULL)
+#define bridge_parent(device) (&((device)->br_port->br->dev))
+
+/* We need these fake structures to make netfilter happy --
+ * lots of places assume that skb->dst != NULL, which isn't
+ * all that unreasonable.
+ *
+ * Currently, we fill in the PMTU entry because netfilter
+ * refragmentation needs it, and the rt_flags entry because
+ * ipt_REJECT needs it. Future netfilter modules might
+ * require us to fill additional fields.
+ */
+static struct net_device __fake_net_device = {
+ hard_header_len: ETH_HLEN
+};
+
+static struct rtable __fake_rtable = {
+ u: {
+ dst: {
+ __refcnt: ATOMIC_INIT(1),
+ dev: &__fake_net_device,
+ pmtu: 1500
+ }
+ },
+
+ rt_flags: 0
+};
+
+
+/* PF_BRIDGE/PRE_ROUTING *********************************************/
+static void __br_dnat_complain(void)
+{
+ static unsigned long last_complaint = 0;
+
+ if (jiffies - last_complaint >= 5 * HZ) {
+ printk(KERN_WARNING "Performing cross-bridge DNAT requires IP "
+ "forwarding to be enabled\n");
+ last_complaint = jiffies;
+ }
+}
+
+
+/* This requires some explaining. If DNAT has taken place,
+ * we will need to fix up the destination Ethernet address,
+ * and this is a tricky process.
+ *
+ * There are two cases to consider:
+ * 1. The packet was DNAT'ed to a device in the same bridge
+ * port group as it was received on. We can still bridge
+ * the packet.
+ * 2. The packet was DNAT'ed to a different device, either
+ * a non-bridged device or another bridge port group.
+ * The packet will need to be routed.
+ *
+ * The correct way of distinguishing between these two cases is to
+ * call ip_route_input() and to look at skb->dst->dev, which is
+ * changed to the destination device if ip_route_input() succeeds.
+ *
+ * Let us first consider the case that ip_route_input() succeeds:
+ *
+ * If skb->dst->dev equals the logical bridge device the packet
+ * came in on, we can consider this bridging. We then call
+ * skb->dst->output() which will make the packet enter br_nf_local_out()
+ * not much later. In that function it is assured that the iptables
+ * FORWARD chain is traversed for the packet.
+ *
+ * Otherwise, the packet is considered to be routed and we just
+ * change the destination MAC address so that the packet will
+ * later be passed up to the IP stack to be routed.
+ *
+ * Let us now consider the case that ip_route_input() fails:
+ *
+ * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input()
+ * will fail, while ip_route_output() will return success. The source
+ * address for ip_route_output() is set to zero, so ip_route_output()
+ * thinks we're handling a locally generated packet and won't care
+ * if IP forwarding is allowed. We send a warning message to the users's
+ * log telling her to put IP forwarding on.
+ *
+ * ip_route_input() will also fail if there is no route available.
+ * In that case we just drop the packet.
+ *
+ * --Lennert, 20020411
+ * --Bart, 20020416 (updated)
+ * --Bart, 20021007 (updated)
+ */
+
+static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb->nf_debug |= (1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_FORWARD);
+#endif
+
+ if (skb->pkt_type == PACKET_OTHERHOST) {
+ skb->pkt_type = PACKET_HOST;
+ skb->brnfmask |= BRNF_PKT_TYPE;
+ }
+
+ skb->dev = bridge_parent(skb->dev);
+ skb->dst->output(skb);
+ return 0;
+}
+
+static int br_nf_pre_routing_finish(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct iphdr *iph = skb->nh.iph;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb->nf_debug ^= (1 << NF_BR_PRE_ROUTING);
+#endif
+
+ if (skb->brnfmask & BRNF_PKT_TYPE) {
+ skb->pkt_type = PACKET_OTHERHOST;
+ skb->brnfmask ^= BRNF_PKT_TYPE;
+ }
+
+ if (dnat_took_place(skb)) {
+ if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
+ dev)) {
+ struct rtable *rt;
+
+ if (!ip_route_output(&rt, iph->daddr, 0, iph->tos, 0)) {
+ /* Bridged-and-DNAT'ed traffic doesn't
+ * require ip_forwarding.
+ */
+ if (((struct dst_entry *)rt)->dev == dev) {
+ skb->dst = (struct dst_entry *)rt;
+ goto bridged_dnat;
+ }
+ __br_dnat_complain();
+ dst_release((struct dst_entry *)rt);
+ }
+ kfree_skb(skb);
+ return 0;
+ } else {
+ if (skb->dst->dev == dev) {
+bridged_dnat:
+ /* Tell br_nf_local_out this is a
+ * bridged frame
+ */
+ skb->brnfmask |= BRNF_BRIDGED_DNAT;
+ skb->dev = skb->physindev;
+ clear_cb(skb);
+ NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
+ skb, skb->dev, NULL,
+ br_nf_pre_routing_finish_bridge,
+ 1);
+ return 0;
+ }
+ memcpy(skb->mac.ethernet->h_dest, dev->dev_addr,
+ ETH_ALEN);
+ }
+ } else {
+ skb->dst = (struct dst_entry *)&__fake_rtable;
+ dst_hold(skb->dst);
+ }
+
+ clear_cb(skb);
+ skb->dev = skb->physindev;
+ NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+ br_handle_frame_finish, 1);
+
+ return 0;
+}
+
+/* Replicate the checks that IPv4 does on packet reception.
+ * Set skb->dev to the bridge device (i.e. parent of the
+ * receiving device) to make netfilter happy, the REDIRECT
+ * target in particular. Save the original destination IP
+ * address to be able to detect DNAT afterwards.
+ */
+static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff
**pskb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct iphdr *iph;
+ __u32 len;
+ struct sk_buff *skb;
+
+ if ((*pskb)->protocol != __constant_htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+ if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
+ goto out;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto inhdr_error;
+
+ iph = skb->nh.iph;
+ if (iph->ihl < 5 || iph->version != 4)
+ goto inhdr_error;
+
+ if (!pskb_may_pull(skb, 4*iph->ihl))
+ goto inhdr_error;
+
+ iph = skb->nh.iph;
+ if (ip_fast_csum((__u8 *)iph, iph->ihl) != 0)
+ goto inhdr_error;
+
+ len = ntohs(iph->tot_len);
+ if (skb->len < len || len < 4*iph->ihl)
+ goto inhdr_error;
+
+ if (skb->len > len) {
+ __pskb_trim(skb, len);
+ if (skb->ip_summed == CHECKSUM_HW)
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb->nf_debug ^= (1 << NF_IP_PRE_ROUTING);
+#endif
+
+ if (skb->pkt_type == PACKET_OTHERHOST) {
+ skb->pkt_type = PACKET_HOST;
+ skb->brnfmask |= BRNF_PKT_TYPE;
+ }
+
+ skb->physindev = skb->dev;
+ skb->dev = bridge_parent(skb->dev);
+ store_orig_dstaddr(skb);
+
+ NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
+ br_nf_pre_routing_finish);
+
+ return NF_STOLEN;
+
+inhdr_error:
+// IP_INC_STATS_BH(IpInHdrErrors);
+out:
+ return NF_DROP;
+}
+
+
+/* PF_BRIDGE/LOCAL_IN ************************************************/
+/* The packet is locally destined, which requires a real
+ * dst_entry, so detach the fake one. On the way up, the
+ * packet would pass through PRE_ROUTING again (which already
+ * took place when the packet entered the bridge), but we
+ * register an IPv4 PRE_ROUTING 'sabotage' hook that will
+ * prevent this from happening.
+ */
+static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct sk_buff *skb = *pskb;
+
+ if (skb->protocol != __constant_htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+ if (skb->dst == (struct dst_entry *)&__fake_rtable) {
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ }
+
+ return NF_ACCEPT;
+}
+
+
+/* PF_BRIDGE/FORWARD *************************************************/
+static int br_nf_forward_finish(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb->nf_debug ^= (1 << NF_BR_FORWARD);
+#endif
+
+ if (skb->brnfmask & BRNF_PKT_TYPE) {
+ skb->pkt_type = PACKET_OTHERHOST;
+ skb->brnfmask ^= BRNF_PKT_TYPE;
+ }
+
+ NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, skb->physindev,
+ skb->dev, br_forward_finish, 1);
+
+ return 0;
+}
+
+/* This is the 'purely bridged' case. We pass the packet to
+ * netfilter with indev and outdev set to the bridge device,
+ * but we are still able to filter on the 'real' indev/outdev
+ * because another bit of the bridge-nf patch overloads the
+ * '-i' and '-o' iptables interface checks to take
+ * skb->phys{in,out}dev into account as well (so both the real
+ * device and the bridge device will match).
+ */
+static unsigned int br_nf_forward(unsigned int hook, struct sk_buff **pskb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct sk_buff *skb = *pskb;
+
+ if (skb->protocol != __constant_htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb->nf_debug ^= (1 << NF_BR_FORWARD);
+#endif
+
+ if (skb->pkt_type == PACKET_OTHERHOST) {
+ skb->pkt_type = PACKET_HOST;
+ skb->brnfmask |= BRNF_PKT_TYPE;
+ }
+
+ skb->physoutdev = skb->dev;
+
+ NF_HOOK(PF_INET, NF_IP_FORWARD, skb, bridge_parent(skb->physindev),
+ bridge_parent(skb->dev), br_nf_forward_finish);
+
+ return NF_STOLEN;
+}
+
+
+/* PF_BRIDGE/LOCAL_OUT ***********************************************/
+static int br_nf_local_out_finish(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb->nf_debug &= ~(1 << NF_BR_LOCAL_OUT);
+#endif
+
+ NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+ br_forward_finish, NF_BR_PRI_FIRST + 1);
+
+ return 0;
+}
+
+
+/* This function sees both locally originated IP packets and forwarded
+ * IP packets (in both cases the destination device is a bridge
+ * device). It also sees bridged-and-DNAT'ed packets.
+ * For the sake of interface transparency (i.e. properly
+ * overloading the '-o' option), we steal packets destined to
+ * a bridge device away from the PF_INET/FORWARD and PF_INET/OUTPUT hook
+ * functions, and give them back later, when we have determined the real
+ * output device. This is done in here.
+ *
+ * If (skb->brnfmask & BRNF_BRIDGED_DNAT) then the packet is bridged
+ * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
+ * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
+ * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
+ * will be executed.
+ * Otherwise, if skb->physindev is NULL, the bridge-nf code never touched
+ * this packet before, and so the packet was locally originated. We fake
+ * the PF_INET/LOCAL_OUT hook.
+ * Finally, if skb->physindev isn't NULL, then the packet was IP routed,
+ * so we fake the PF_INET/FORWARD hook. ipv4_sabotage_out() makes sure
+ * even routed packets that didn't arrive on a bridge interface have their
+ * skb->physindev set.
+ */
+
+static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
+ const struct net_device *in, const struct net_device *out,
+ int (*_okfn)(struct sk_buff *))
+{
+ int (*okfn)(struct sk_buff *skb);
+ struct net_device *realindev;
+ struct sk_buff *skb = *pskb;
+
+ if (skb->protocol != __constant_htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+ /* Sometimes we get packets with NULL ->dst here (for example,
+ * running a dhcp client daemon triggers this).
+ */
+ if (skb->dst == NULL)
+ return NF_ACCEPT;
+
+ skb->physoutdev = skb->dev;
+
+ realindev = skb->physindev;
+
+ /* Bridged, take PF_BRIDGE/FORWARD.
+ * (see big note in front of br_nf_pre_routing_finish)
+ */
+ if (skb->brnfmask & BRNF_BRIDGED_DNAT) {
+ okfn = br_forward_finish;
+
+ if (skb->brnfmask & BRNF_PKT_TYPE) {
+ skb->pkt_type = PACKET_OTHERHOST;
+ skb->brnfmask ^= BRNF_PKT_TYPE;
+ }
+
+ NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev,
+ skb->dev, okfn);
+ } else {
+ okfn = br_nf_local_out_finish;
+ /* IP forwarded traffic has a physindev, locally
+ * generated traffic hasn't.
+ */
+ if (realindev != NULL) {
+ if (((skb->brnfmask & BRNF_DONT_TAKE_PARENT) == 0) &&
+ has_bridge_parent(realindev))
+ realindev = bridge_parent(realindev);
+
+ NF_HOOK_THRESH(PF_INET, NF_IP_FORWARD, skb, realindev,
+ bridge_parent(skb->dev), okfn,
+ NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1);
+ } else {
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb->nf_debug ^= (1 << NF_IP_LOCAL_OUT);
+#endif
+
+ NF_HOOK_THRESH(PF_INET, NF_IP_LOCAL_OUT, skb, realindev,
+ bridge_parent(skb->dev), okfn,
+ NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1);
+ }
+ }
+
+ return NF_STOLEN;
+}
+
+
+/* PF_BRIDGE/POST_ROUTING ********************************************/
+static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff
**pskb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct sk_buff *skb = *pskb;
+
+ /* Be very paranoid. */
+ if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) {
+ printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
+ "bad mac.raw pointer.");
+ if (skb->dev != NULL) {
+ printk("[%s]", skb->dev->name);
+ if (has_bridge_parent(skb->dev))
+ printk("[%s]", bridge_parent(skb->dev)->name);
+ }
+ printk("\n");
+ return NF_ACCEPT;
+ }
+
+ if (skb->protocol != __constant_htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+ /* Sometimes we get packets with NULL ->dst here (for example,
+ * running a dhcp client daemon triggers this).
+ */
+ if (skb->dst == NULL)
+ return NF_ACCEPT;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb->nf_debug ^= (1 << NF_IP_POST_ROUTING);
+#endif
+
+ /* We assume any code from br_dev_queue_push_xmit onwards doesn't care
+ * about the value of skb->pkt_type.
+ */
+ if (skb->pkt_type == PACKET_OTHERHOST) {
+ skb->pkt_type = PACKET_HOST;
+ skb->brnfmask |= BRNF_PKT_TYPE;
+ }
+
+ /* Fragmented packets need a good Ethernet header, tell this to
+ * ip_output.c::ip_fragment().
+ */
+ skb->brnfmask |= BRNF_COPY_HEADER;
+
+ NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL,
+ bridge_parent(skb->dev), br_dev_queue_push_xmit);
+
+ return NF_STOLEN;
+}
+
+
+/* IPv4/SABOTAGE *****************************************************/
+
+/* Don't hand locally destined packets to PF_INET/PRE_ROUTING
+ * for the second time.
+ */
+static unsigned int ipv4_sabotage_in(unsigned int hook, struct sk_buff
**pskb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ if (in->hard_start_xmit == br_dev_xmit &&
+ okfn != br_nf_pre_routing_finish) {
+ okfn(*pskb);
+ return NF_STOLEN;
+ }
+
+ return NF_ACCEPT;
+}
+
+/* Postpone execution of PF_INET/FORWARD, PF_INET/LOCAL_OUT
+ * and PF_INET/POST_ROUTING until we have done the forwarding
+ * decision in the bridge code and have determined skb->physoutdev.
+ */
+static unsigned int ipv4_sabotage_out(unsigned int hook, struct sk_buff
**pskb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ if (out->hard_start_xmit == br_dev_xmit &&
+ okfn != br_nf_forward_finish &&
+ okfn != br_nf_local_out_finish &&
+ okfn != br_dev_queue_push_xmit) {
+ struct sk_buff *skb = *pskb;
+
+ /* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we
+ * will need the indev then. For a brouter, the real indev
+ * can be a bridge port, so we make sure br_nf_local_out()
+ * doesn't use the bridge parent of the indev by using
+ * the BRNF_DONT_TAKE_PARENT mask.
+ */
+ if (hook == NF_IP_FORWARD && skb->physindev == NULL) {
+ skb->brnfmask &= BRNF_DONT_TAKE_PARENT;
+ skb->physindev = (struct net_device *)in;
+ }
+ okfn(skb);
+ return NF_STOLEN;
+ }
+
+ return NF_ACCEPT;
+}
+
+/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that
innocent
+ * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
+ * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
+ * ip_refrag() can return NF_STOLEN.
+ */
+static struct nf_hook_ops br_nf_ops[] = {
+ { { NULL, NULL }, br_nf_pre_routing, PF_BRIDGE, NF_BR_PRE_ROUTING,
NF_BR_PRI_BRNF },
+ { { NULL, NULL }, br_nf_local_in, PF_BRIDGE, NF_BR_LOCAL_IN, NF_BR_PRI_BRNF
},
+ { { NULL, NULL }, br_nf_forward, PF_BRIDGE, NF_BR_FORWARD, NF_BR_PRI_BRNF },
+ { { NULL, NULL }, br_nf_local_out, PF_BRIDGE, NF_BR_LOCAL_OUT,
NF_BR_PRI_FIRST },
+ { { NULL, NULL }, br_nf_post_routing, PF_BRIDGE, NF_BR_POST_ROUTING,
NF_BR_PRI_LAST },
+ { { NULL, NULL }, ipv4_sabotage_in, PF_INET, NF_IP_PRE_ROUTING,
NF_IP_PRI_FIRST },
+ { { NULL, NULL }, ipv4_sabotage_out, PF_INET, NF_IP_FORWARD,
NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD },
+ { { NULL, NULL }, ipv4_sabotage_out, PF_INET, NF_IP_LOCAL_OUT,
NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT },
+ { { NULL, NULL }, ipv4_sabotage_out, PF_INET, NF_IP_POST_ROUTING,
NF_IP_PRI_FIRST }
+};
+
+#define NUMHOOKS (sizeof(br_nf_ops)/sizeof(br_nf_ops[0]))
+
+int br_netfilter_init(void)
+{
+ int i;
+
+ for (i = 0; i < NUMHOOKS; i++) {
+ int ret;
+
+ if ((ret = nf_register_hook(&br_nf_ops[i])) >= 0)
+ continue;
+
+ while (i--)
+ nf_unregister_hook(&br_nf_ops[i]);
+
+ return ret;
+ }
+
+ printk(KERN_NOTICE "Bridge firewalling registered\n");
+
+ return 0;
+}
+
+void br_netfilter_fini(void)
+{
+ int i;
+
+ for (i = NUMHOOKS - 1; i >= 0; i--)
+ nf_unregister_hook(&br_nf_ops[i]);
+}

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Tue Oct 15 2002 - 22:00:51 EST