[net-next 1/2] Perform IPv4 FIB lookup in a predefined FIB table
From: Carmine Scarpitta
Date: Wed Feb 12 2020 - 20:24:58 EST
In IPv4, the routing subsystem is invoked by calling ip_route_input_rcu()
which performs the recognition logic and calls ip_route_input_slow().
ip_route_input_slow() initialises both "fi" and "table" members
of the fib_result structure to null before calling fib_lookup().
fib_lookup() performs fib lookup in the routing table configured
by the policy routing rules.
In this patch, we allow invoking the ip4 routing subsystem
with known routing table. This is useful for use-cases implementing
a separate routing table per tenant.
The patch introduces a new flag named "tbl_known" to the definition of
ip_route_input_rcu() and ip_route_input_slow().
When the flag is set, ip_route_input_slow() will call fib_table_lookup()
using the defined table instead of using fib_lookup().
Signed-off-by: Carmine Scarpitta <carmine.scarpitta@xxxxxxxxxxx>
Acked-by: Ahmed Abdelsalam <ahmed.abdelsalam@xxxxxxx>
Acked-by: Andrea Mayer <andrea.mayer@xxxxxxxxxxx>
Acked-by: Paolo Lungaroni <paolo.lungaroni@xxxxxxx>
---
include/net/route.h | 2 +-
net/ipv4/route.c | 22 ++++++++++++++--------
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/include/net/route.h b/include/net/route.h
index a9c60fc68e36..4ff977bd7029 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -183,7 +183,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin);
int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin,
- struct fib_result *res);
+ struct fib_result *res, bool tbl_known);
int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d5c57b3f77d5..39cec9883d6f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2077,7 +2077,7 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev,
- struct fib_result *res)
+ struct fib_result *res, bool tbl_known)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct flow_keys *flkeys = NULL, _flkeys;
@@ -2109,8 +2109,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
goto martian_source;
- res->fi = NULL;
- res->table = NULL;
if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
goto brd_input;
@@ -2155,7 +2153,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.fl4_dport = 0;
}
- err = fib_lookup(net, &fl4, res, 0);
+ if (!tbl_known) {
+ res->fi = NULL;
+ res->table = NULL;
+ err = fib_lookup(net, &fl4, res, 0);
+ } else {
+ err = fib_table_lookup(res->table, &fl4, res, FIB_LOOKUP_NOREF);
+ }
+
if (err != 0) {
if (!IN_DEV_FORWARD(in_dev))
err = -EHOSTUNREACH;
@@ -2292,7 +2297,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
tos &= IPTOS_RT_MASK;
rcu_read_lock();
- err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
+ err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res, false);
rcu_read_unlock();
return err;
@@ -2301,7 +2306,8 @@ EXPORT_SYMBOL(ip_route_input_noref);
/* called with rcu_read_lock held */
int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, struct fib_result *res)
+ u8 tos, struct net_device *dev, struct fib_result *res,
+ bool tbl_known)
{
/* Multicast recognition logic is moved from route cache to here.
The problem was that too many Ethernet cards have broken/missing
@@ -2347,7 +2353,7 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
return err;
}
- return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
+ return ip_route_input_slow(skb, daddr, saddr, tos, dev, res, tbl_known);
}
/* called with rcu_read_lock() */
@@ -3192,7 +3198,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
skb->dev = dev;
skb->mark = mark;
err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
- dev, &res);
+ dev, &res, false);
rt = skb_rtable(skb);
if (err == 0 && rt->dst.error)
--
2.17.1