[PATCH 22/24] net, diet: Support simpler routing table

From: Andi Kleen
Date: Mon May 05 2014 - 18:27:26 EST


From: Andi Kleen <ak@xxxxxxxxxxxxxxx>

Add an optional fib_list that uses a simple list to store routes.
This is suitable for single homed client system which typically
have only a handful of routes.

The new file is a complete 1:1 replacement for fib_trie and
implements the same API for other files.

The code still uses RCU, so should have similar multi-core scalability,
as long as the number of routes is small. It also has
all the functionality of the fib_trie, so can be used
with advanced routing (except for a few missing proc files)

The code started as a copy of fib_trie, with all the trie
code replaced with a simple list. There's some very similar
code with trie that could be factored out in a common file.

Worth about 8.5k of code. In addition it uses kmalloc
instead of explicit slabs, which gives better memory efficiency
for very small routing table (all data <4K, so sharing with
other kmalloc users is possible)

text data bss dec hex filename
943211 71810 25352 1040373 fdff5 net/built-in.o-fib-list
951716 72066 25352 1049134 10022e net/built-in.o-fib-trie

XXX experimential, needs more testing, review

Cc: robert.olsson@xxxxxxxxx
Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
---
net/ipv4/Kconfig | 18 +-
net/ipv4/Makefile | 4 +-
net/ipv4/fib_list.c | 985 ++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 1005 insertions(+), 2 deletions(-)
create mode 100644 net/ipv4/fib_list.c

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index cdb4f57..578e332 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -55,9 +55,25 @@ config IP_PING
---help---
Enable ping sockets to enable suid-less ping.

+config FIB_TRIE
+ bool "Use trie based routing table"
+ default y
+ ---help---
+ This is an advanced routing table implementation that scales to a larger
+ number of routes. When in doubt say y.
+
+config FIB_LIST
+ bool
+ default y
+ depends on !FIB_TRIE
+ ---help---
+ This is a very simple linear routing table implementation that is only
+ suitable for clients with a very small number of routes (<10). The only
+ advantage over FIB_TRIE is smaller code size.
+
config IP_FIB_TRIE_STATS
bool "FIB TRIE statistics"
- depends on IP_ADVANCED_ROUTER
+ depends on IP_ADVANCED_ROUTER && FIB_TRIE
---help---
Keep track of statistics on structure of FIB TRIE table.
Useful for testing and measuring TRIE performance.
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 9353beb..8c2a08d 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,9 +10,11 @@ obj-y := route.o inetpeer.o protocol.o \
tcp_minisocks.o tcp_cong.o \
datagram.o udp.o udplite.o \
arp.o icmp.o devinet.o af_inet.o \
- fib_frontend.o fib_semantics.o fib_trie.o \
+ fib_frontend.o fib_semantics.o \
inet_fragment.o ip_tunnel_core.o gre_offload.o

+obj-$(CONFIG_FIB_TRIE) += fib_trie.o
+obj-$(CONFIG_FIB_LIST) += fib_list.o
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
obj-$(CONFIG_IP_PING) += ping.o
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
diff --git a/net/ipv4/fib_list.c b/net/ipv4/fib_list.c
new file mode 100644
index 0000000..969e54d
--- /dev/null
+++ b/net/ipv4/fib_list.c
@@ -0,0 +1,985 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version
+ * 2 of the License.
+ *
+ * This is based on the fib_trie code (which was based on fib_hash),
+ * but all the trie code removed and replaced with a simple
+ * list. In addition it uses kmalloc instead of slab directly
+ * to save some dynamic memory.
+ *
+ * This is for systems with very small routing tables, like
+ * your typical single-homed client. You shouldn't be using this for
+ * more than a handful of routes. The main motivation is smaller
+ * code size. Apart from being less scalable and some missing
+ * proc output the functionality should be the same as fib_trie.
+ *
+ * Some code is very similar with fib_trie and could be later
+ * factored out into a shared file.
+ *
+ * List code by Andi Kleen, original trie file was:
+ *
+ * Robert Olsson <robert.olsson@xxxxxxxxx> Uppsala Universitet
+ * & Swedish University of Agricultural Sciences.
+ *
+ * Jens Laas <jens.laas@xxxxxxxxxxx> Swedish University of
+ * Agricultural Sciences.
+ *
+ * Hans Liss <hans.liss@xxxxxxxxx> Uppsala Universitet
+ *
+ * Code from fib_hash has been reused which includes the following header:
+ *
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * IPv4 FIB: lookup engine and maintenance routines.
+ *
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@xxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Substantial contributions to this work comes from:
+ *
+ * David S. Miller, <davem@xxxxxxxxxxxxx>
+ * Stephen Hemminger <shemminger@xxxxxxxx>
+ * Paul E. McKenney <paulmck@xxxxxxxxxx>
+ * Patrick McHardy <kaber@xxxxxxxxx>
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/inetdevice.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <net/net_namespace.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
+#include "fib_lookup.h"
+
+/* One entry with the same key/mask */
+struct entry {
+ struct hlist_node nd;
+ u32 key;
+ u32 mask;
+ int plen;
+ struct hlist_head list;
+ struct rcu_head rcu;
+};
+
+struct entry_info {
+ struct hlist_node hlist;
+ int plen;
+ u32 mask_plen; /* ntohl(inet_make_mask(plen)) */
+ struct list_head falh;
+ struct rcu_head rcu;
+};
+
+/* The list is ordered by the prefix length. Larger prefixes come
+ * earlier (so the default routes should be always at the end)
+ * Protected by the RTNL for writing, reading is handled with standard
+ * list RCU.
+ */
+struct rlist {
+ struct hlist_head list;
+};
+
+static void __alias_free_mem(struct rcu_head *head)
+{
+ struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
+ kfree(fa);
+}
+
+static inline void alias_free_mem_rcu(struct fib_alias *fa)
+{
+ call_rcu(&fa->rcu, __alias_free_mem);
+}
+
+static void __entry_free_rcu(struct rcu_head *head)
+{
+ struct entry *l = container_of(head, struct entry, rcu);
+ kfree(l);
+}
+
+static inline void free_entry(struct entry *entry)
+{
+ call_rcu(&entry->rcu, __entry_free_rcu);
+}
+
+static inline void free_entry_info(struct entry_info *entry)
+{
+ kfree_rcu(entry, rcu);
+}
+
+static struct entry *entry_new(void)
+{
+ return kzalloc(sizeof(struct entry), GFP_KERNEL);
+}
+
+static struct entry_info *entry_info_new(int plen)
+{
+ struct entry_info *ei = kzalloc(sizeof(struct entry_info), GFP_KERNEL);
+ if (ei) {
+ ei->plen = plen;
+ ei->mask_plen = ntohl(inet_make_mask(plen));
+ INIT_LIST_HEAD(&ei->falh);
+ }
+ return ei;
+}
+
+/* readside must use rcu_read_lock currently dump routines
+ via get_fa_head and dump */
+
+static struct entry_info *find_entry_info(struct entry *e, int plen)
+{
+ struct hlist_head *head = &e->list;
+ struct entry_info *ei;
+
+ hlist_for_each_entry_rcu(ei, head, hlist)
+ if (ei->plen == plen)
+ return ei;
+
+ return NULL;
+}
+
+static inline struct list_head *get_fa_head(struct entry *l, int plen)
+{
+ struct entry_info *li = find_entry_info(l, plen);
+
+ if (!li)
+ return NULL;
+
+ return &li->falh;
+}
+
+static void insert_entry_info(struct hlist_head *head, struct entry_info *new)
+{
+ struct entry_info *li = NULL, *last = NULL;
+
+ if (hlist_empty(head)) {
+ hlist_add_head_rcu(&new->hlist, head);
+ } else {
+ hlist_for_each_entry(li, head, hlist) {
+ if (new->plen > li->plen)
+ break;
+
+ last = li;
+ }
+ if (last)
+ hlist_add_after_rcu(&last->hlist, &new->hlist);
+ else
+ hlist_add_before_rcu(&new->hlist, &li->hlist);
+ }
+}
+
+/* rcu_read_lock needs to be hold by caller from readside */
+static struct entry *
+fib_find_node(struct rlist *rl, u32 key)
+{
+ struct entry *entry;
+
+ hlist_for_each_entry_rcu (entry, &rl->list, nd) {
+ if (key == entry->key)
+ return entry;
+ }
+ return NULL;
+}
+
+/* only used from updater-side */
+
+static struct list_head *fib_insert_node(struct rlist *rl, u32 key, int plen)
+{
+ struct entry *entry;
+ struct entry_info *ei;
+ struct hlist_node *prev = NULL;
+ struct list_head *fa_head;
+
+ hlist_for_each_entry (entry, &rl->list, nd) {
+ if (entry->plen < plen)
+ break;
+ prev = &entry->nd;
+ }
+ entry = entry_new();
+ if (!entry)
+ return NULL;
+ entry->key = key;
+ entry->mask = ntohl(inet_make_mask(plen));
+ entry->plen = plen;
+ if (prev)
+ hlist_add_after_rcu(prev, &entry->nd);
+ else
+ hlist_add_head_rcu(&entry->nd, &rl->list);
+ ei = entry_info_new(plen);
+ if (!ei)
+ return NULL;
+ fa_head = &ei->falh;
+ insert_entry_info(&entry->list, ei);
+ return fa_head;
+}
+
+int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
+{
+ struct rlist *rl = (struct rlist *) tb->tb_data;
+ struct fib_alias *fa, *new_fa;
+ struct list_head *fa_head = NULL;
+ struct fib_info *fi;
+ int plen = cfg->fc_dst_len;
+ u8 tos = cfg->fc_tos;
+ u32 key, mask;
+ int err;
+ struct entry *l;
+ bool found = false;
+
+ if (plen > 32)
+ return -EINVAL;
+
+ key = ntohl(cfg->fc_dst);
+
+ pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
+
+ mask = ntohl(inet_make_mask(plen));
+
+ if (key & ~mask)
+ return -EINVAL;
+
+ fi = fib_create_info(cfg);
+ if (IS_ERR(fi)) {
+ err = PTR_ERR(fi);
+ goto err;
+ }
+
+ hlist_for_each_entry (l, &rl->list, nd) {
+ if (l->key == key) {
+ found = true;
+ break;
+ }
+ }
+ fa = NULL;
+ if (!found)
+ l = NULL;
+
+ if (l) {
+ fa_head = get_fa_head(l, plen);
+ fa = fib_find_alias(fa_head, tos, fi->fib_priority);
+ }
+
+ /* Now fa, if non-NULL, points to the first fib alias
+ * with the same keys [prefix,tos,priority], if such key already
+ * exists or to the node before which we will insert new one.
+ *
+ * If fa is NULL, we will need to allocate a new one and
+ * insert to the head of f.
+ *
+ * If f is NULL, no fib node matched the destination key
+ * and we need to allocate a new one of those as well.
+ */
+
+ if (fa && fa->fa_tos == tos &&
+ fa->fa_info->fib_priority == fi->fib_priority) {
+ struct fib_alias *fa_first, *fa_match;
+ int iter = 0;
+
+ err = -EEXIST;
+ if (cfg->fc_nlflags & NLM_F_EXCL)
+ goto out;
+
+ /* We have 2 goals:
+ * 1. Find exact match for type, scope, fib_info to avoid
+ * duplicate routes
+ * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
+ */
+ fa_match = NULL;
+ fa_first = fa;
+ fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
+ list_for_each_entry_continue(fa, fa_head, fa_list) {
+ if (fa->fa_tos != tos)
+ break;
+ if (fa->fa_info->fib_priority != fi->fib_priority)
+ break;
+ if (fa->fa_type == cfg->fc_type &&
+ fa->fa_info == fi) {
+ fa_match = fa;
+ break;
+ }
+ iter++;
+ }
+ if (cfg->fc_nlflags & NLM_F_REPLACE) {
+ struct fib_info *fi_drop;
+ u8 state;
+
+ fa = fa_first;
+ if (fa_match) {
+ if (fa == fa_match)
+ err = 0;
+ goto out;
+ }
+ err = -ENOBUFS;
+ new_fa = kmalloc(sizeof(struct fib_alias), GFP_KERNEL);
+ if (new_fa == NULL)
+ goto out;
+
+ fi_drop = fa->fa_info;
+ new_fa->fa_tos = fa->fa_tos;
+ new_fa->fa_info = fi;
+ new_fa->fa_type = cfg->fc_type;
+ state = fa->fa_state;
+ new_fa->fa_state = state & ~FA_S_ACCESSED;
+
+ list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
+ alias_free_mem_rcu(fa);
+
+ fib_release_info(fi_drop);
+ if (state & FA_S_ACCESSED)
+ rt_cache_flush(cfg->fc_nlinfo.nl_net);
+ rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
+ tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
+
+ goto succeeded;
+ }
+ /* Error if we find a perfect match which
+ * uses the same scope, type, and nexthop
+ * information.
+ */
+ if (fa_match) {
+ goto out;
+ }
+
+ if (!(cfg->fc_nlflags & NLM_F_APPEND))
+ fa = fa_first;
+ }
+ err = -ENOENT;
+ if (!(cfg->fc_nlflags & NLM_F_CREATE))
+ goto out;
+
+ err = -ENOBUFS;
+ new_fa = kmalloc(sizeof(struct fib_alias), GFP_KERNEL);
+ if (new_fa == NULL)
+ goto out;
+
+ new_fa->fa_info = fi;
+ new_fa->fa_tos = tos;
+ new_fa->fa_type = cfg->fc_type;
+ new_fa->fa_state = 0;
+
+ /*
+ * Insert new entry to the list.
+ */
+
+ if (!fa_head) {
+ fa_head = fib_insert_node(rl, key, plen);
+ if (unlikely(!fa_head)) {
+ err = -ENOMEM;
+ goto out_free_new_fa;
+ }
+ }
+ if (!plen)
+ tb->tb_num_default++;
+
+ list_add_tail_rcu(&new_fa->fa_list,
+ (fa ? &fa->fa_list : fa_head));
+
+ rt_cache_flush(cfg->fc_nlinfo.nl_net);
+ rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
+ &cfg->fc_nlinfo, 0);
+succeeded:
+ return 0;
+
+out_free_new_fa:
+ kfree(new_fa);
+out:
+ fib_release_info(fi);
+err:
+ pr_debug("insert err %d\n", err);
+ return err;
+}
+
+static int check_entry(struct fib_table *tb, struct entry *l,
+ u32 key, const struct flowi4 *flp,
+ struct fib_result *res, int fib_flags)
+{
+ struct entry_info *li;
+ struct hlist_head *hhead = &l->list;
+
+ hlist_for_each_entry_rcu(li, hhead, hlist) {
+ struct fib_alias *fa;
+
+ list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+ struct fib_info *fi = fa->fa_info;
+ int nhsel, err;
+
+ if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+ continue;
+ if (fi->fib_dead)
+ continue;
+ if (fa->fa_info->fib_scope < flp->flowi4_scope)
+ continue;
+ fib_alias_accessed(fa);
+ err = fib_props[fa->fa_type].error;
+ if (err) {
+ return err;
+ }
+ if (fi->fib_flags & RTNH_F_DEAD)
+ continue;
+ for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
+ const struct fib_nh *nh = &fi->fib_nh[nhsel];
+
+ if (nh->nh_flags & RTNH_F_DEAD)
+ continue;
+ if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
+ continue;
+
+ res->prefixlen = li->plen;
+ res->nh_sel = nhsel;
+ res->type = fa->fa_type;
+ res->scope = fa->fa_info->fib_scope;
+ res->fi = fi;
+ res->table = tb;
+ res->fa_head = &li->falh;
+ if (!(fib_flags & FIB_LOOKUP_NOREF))
+ atomic_inc(&fi->fib_clntref);
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
+ struct fib_result *res, int fib_flags)
+{
+ struct entry *entry;
+ struct rlist *rl = (struct rlist *)tb->tb_data;
+ u32 key = ntohl(flp->daddr);
+ int ret = 1;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu (entry, &rl->list, nd) {
+ if ((key & entry->mask) == entry->key) {
+ ret = check_entry(tb, entry, key, flp, res, fib_flags);
+ if (ret <= 0)
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(fib_table_lookup);
+
+static void entry_remove(struct entry *entry)
+{
+ hlist_del_rcu(&entry->nd);
+ free_entry(entry);
+}
+
+/*
+ * Caller must hold RTNL.
+ */
+int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
+{
+ struct rlist *rl = (struct rlist *) tb->tb_data;
+ u32 key, mask;
+ int plen = cfg->fc_dst_len;
+ u8 tos = cfg->fc_tos;
+ struct fib_alias *fa, *fa_to_delete;
+ struct list_head *fa_head;
+ struct entry *l;
+ struct entry_info *li;
+
+ if (plen > 32)
+ return -EINVAL;
+
+ key = ntohl(cfg->fc_dst);
+ mask = ntohl(inet_make_mask(plen));
+
+ if (key & ~mask)
+ return -EINVAL;
+
+ l = fib_find_node(rl, key);
+
+ if (!l)
+ return -ESRCH;
+
+ li = find_entry_info(l, plen);
+
+ if (!li)
+ return -ESRCH;
+
+ fa_head = &li->falh;
+ fa = fib_find_alias(fa_head, tos, 0);
+
+ if (!fa)
+ return -ESRCH;
+
+ pr_debug("Deleting %08x/%d tos=%d\n", key, plen, tos);
+
+ fa_to_delete = NULL;
+ fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
+ list_for_each_entry_continue(fa, fa_head, fa_list) {
+ struct fib_info *fi = fa->fa_info;
+
+ if (fa->fa_tos != tos)
+ break;
+
+ if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
+ (cfg->fc_scope == RT_SCOPE_NOWHERE ||
+ fa->fa_info->fib_scope == cfg->fc_scope) &&
+ (!cfg->fc_prefsrc ||
+ fi->fib_prefsrc == cfg->fc_prefsrc) &&
+ (!cfg->fc_protocol ||
+ fi->fib_protocol == cfg->fc_protocol) &&
+ fib_nh_match(cfg, fi) == 0) {
+ fa_to_delete = fa;
+ break;
+ }
+ }
+
+ if (!fa_to_delete)
+ return -ESRCH;
+
+ fa = fa_to_delete;
+ rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
+ &cfg->fc_nlinfo, 0);
+
+ list_del_rcu(&fa->fa_list);
+
+ if (!plen)
+ tb->tb_num_default--;
+
+ if (list_empty(fa_head)) {
+ hlist_del_rcu(&li->hlist);
+ free_entry_info(li);
+ }
+
+ if (hlist_empty(&l->list))
+ entry_remove(l);
+
+ if (fa->fa_state & FA_S_ACCESSED)
+ rt_cache_flush(cfg->fc_nlinfo.nl_net);
+
+ fib_release_info(fa->fa_info);
+ alias_free_mem_rcu(fa);
+ return 0;
+}
+
+static int flush_list(struct list_head *head)
+{
+ struct fib_alias *fa, *fa_node;
+ int found = 0;
+
+ list_for_each_entry_safe(fa, fa_node, head, fa_list) {
+ struct fib_info *fi = fa->fa_info;
+
+ if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
+ list_del_rcu(&fa->fa_list);
+ fib_release_info(fa->fa_info);
+ alias_free_mem_rcu(fa);
+ found++;
+ }
+ }
+ return found;
+}
+
+static int flush_entry(struct entry *e)
+{
+ int found = 0;
+ struct hlist_head *lih = &e->list;
+ struct hlist_node *tmp;
+ struct entry_info *li = NULL;
+
+ hlist_for_each_entry_safe(li, tmp, lih, hlist) {
+ found += flush_list(&li->falh);
+
+ if (list_empty(&li->falh)) {
+ hlist_del_rcu(&li->hlist);
+ free_entry_info(li);
+ }
+ }
+ return found;
+}
+
+static struct entry *first_entry(struct rlist *rl)
+{
+ struct hlist_node *first;
+
+ first = rcu_dereference(hlist_first_rcu(&rl->list));
+ return first ? hlist_entry(first, struct entry, nd) : NULL;
+}
+
+static struct entry *next_entry(struct entry *e)
+{
+ struct hlist_node *next = hlist_next_rcu(&e->nd);
+
+ return next ? hlist_entry(next, struct entry, nd) : NULL;
+}
+
+static struct entry *entry_index(struct rlist *rl, int index)
+{
+ struct entry *e;
+
+ hlist_for_each_entry_rcu (e, &rl->list, nd) {
+ if (index-- <= 0)
+ break;
+ }
+ return e;
+}
+
+/*
+ * Caller must hold RTNL.
+ */
+int fib_table_flush(struct fib_table *tb)
+{
+ struct rlist *rl = (struct rlist *) tb->tb_data;
+ struct entry *ei;
+ struct hlist_node *tmp;
+ int found = 0;
+
+ hlist_for_each_entry_safe (ei, tmp, &rl->list, nd) {
+ found += flush_entry(ei);
+ if (hlist_empty(&ei->list))
+ entry_remove(ei);
+ }
+ pr_debug("flush found=%d\n", found);
+ return found;
+}
+
+void fib_free_table(struct fib_table *tb)
+{
+ kfree(tb);
+}
+
+static int fn_list_dump_fa(u32 key, int plen, struct list_head *fah,
+ struct fib_table *tb,
+ struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int i, s_i;
+ struct fib_alias *fa;
+ __be32 xkey = htonl(key);
+
+ s_i = cb->args[5];
+ i = 0;
+
+ /* rcu_read_lock is hold by caller */
+
+ list_for_each_entry_rcu(fa, fah, fa_list) {
+ if (i < s_i) {
+ i++;
+ continue;
+ }
+
+ if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWROUTE,
+ tb->tb_id,
+ fa->fa_type,
+ xkey,
+ plen,
+ fa->fa_tos,
+ fa->fa_info, NLM_F_MULTI) < 0) {
+ cb->args[5] = i;
+ return -1;
+ }
+ i++;
+ }
+ cb->args[5] = i;
+ return skb->len;
+}
+
+static int fn_dump_entry(struct entry *l, struct fib_table *tb,
+ struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct entry_info *li;
+ int i, s_i;
+
+ s_i = cb->args[4];
+ i = 0;
+
+ /* rcu_read_lock is hold by caller */
+ hlist_for_each_entry_rcu(li, &l->list, hlist) {
+ if (i < s_i) {
+ i++;
+ continue;
+ }
+
+ if (i > s_i)
+ cb->args[5] = 0;
+
+ if (list_empty(&li->falh))
+ continue;
+
+ if (fn_list_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
+ cb->args[4] = i;
+ return -1;
+ }
+ i++;
+ }
+
+ cb->args[4] = i;
+ return skb->len;
+}
+
+int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct entry *l;
+ struct rlist *rl = (struct rlist *) tb->tb_data;
+ u32 key = cb->args[2];
+ int count = cb->args[3];
+
+ rcu_read_lock();
+ if (count == 0)
+ l = first_entry(rl);
+ else {
+ /* Normally, continue from last key, but if that is missing
+ * fallback to using slow rescan
+ */
+ l = fib_find_node(rl, key);
+ if (!l)
+ l = entry_index(rl, count);
+ }
+
+ while (l) {
+ cb->args[2] = l->key;
+ if (fn_dump_entry(l, tb, skb, cb) < 0) {
+ cb->args[3] = count;
+ rcu_read_unlock();
+ return -1;
+ }
+
+ ++count;
+ l = next_entry(l);
+ memset(&cb->args[4], 0,
+ sizeof(cb->args) - 4*sizeof(cb->args[0]));
+ }
+ cb->args[3] = count;
+ rcu_read_unlock();
+
+ return skb->len;
+}
+
+/* new name? */
+struct fib_table *fib_trie_table(u32 id)
+{
+ struct fib_table *tb;
+
+ tb = kzalloc(sizeof(struct fib_table) + sizeof(struct rtable),
+ GFP_KERNEL);
+ if (tb == NULL)
+ return NULL;
+
+ tb->tb_id = id;
+ tb->tb_default = -1;
+ tb->tb_num_default = 0;
+
+ return tb;
+}
+
+#ifdef CONFIG_PROC_FS
+
+struct fib_route_iter {
+ struct seq_net_private p;
+ struct rlist *rl;
+ loff_t pos;
+ u32 key;
+};
+
+static struct entry *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
+{
+ struct entry *e = NULL;
+
+ /* use cache location of last found key */
+ if (iter->pos > 0 && pos >= iter->pos && (e = fib_find_node(iter->rl, iter->key)) != NULL)
+ pos -= iter->pos;
+ else {
+ iter->pos = 0;
+ e = first_entry(iter->rl);
+ }
+
+ while (e && pos-- > 0) {
+ iter->pos++;
+ e = next_entry(e);
+ }
+
+ if (e)
+ iter->key = pos; /* remember it */
+ else
+ iter->pos = 0; /* forget it */
+
+ return e;
+}
+
+static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ struct fib_route_iter *iter = seq->private;
+ struct fib_table *tb;
+
+ rcu_read_lock();
+ tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
+ if (!tb)
+ return NULL;
+
+ iter->rl = (struct rlist *) tb->tb_data;
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+ else
+ return fib_route_get_idx(iter, *pos - 1);
+}
+
+static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct fib_route_iter *iter = seq->private;
+ struct entry *e = v;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN) {
+ iter->pos = 0;
+ e = first_entry(iter->rl);
+ } else {
+ iter->pos++;
+ e = next_entry(e);
+ }
+
+ if (e)
+ iter->key = e->key;
+ else
+ iter->pos = 0;
+ return e;
+}
+
+static void fib_route_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
+{
+ rcu_read_unlock();
+}
+
+static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
+{
+ unsigned int flags = 0;
+
+ if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
+ flags = RTF_REJECT;
+ if (fi && fi->fib_nh->nh_gw)
+ flags |= RTF_GATEWAY;
+ if (mask == htonl(0xFFFFFFFF))
+ flags |= RTF_HOST;
+ flags |= RTF_UP;
+ return flags;
+}
+
+/* This outputs /proc/net/route.
+ * The format of the file is not supposed to be changed
+ * and needs to be same as fib_hash output to avoid breaking
+ * legacy utilities.
+ */
+static int fib_route_seq_show(struct seq_file *seq, void *v)
+{
+ struct entry *entry = v;
+ struct entry_info *ei;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
+ "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
+ "\tWindow\tIRTT");
+ return 0;
+ }
+
+ hlist_for_each_entry_rcu(ei, &entry->list, hlist) {
+ struct fib_alias *fa;
+ __be32 mask, prefix;
+
+ mask = inet_make_mask(ei->plen);
+ prefix = htonl(entry->key);
+
+ list_for_each_entry_rcu(fa, &ei->falh, fa_list) {
+ const struct fib_info *fi = fa->fa_info;
+ unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
+
+ if (fa->fa_type == RTN_BROADCAST
+ || fa->fa_type == RTN_MULTICAST)
+ continue;
+
+ seq_setwidth(seq, 127);
+
+ if (fi)
+ seq_printf(seq,
+ "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
+ "%d\t%08X\t%d\t%u\t%u",
+ fi->fib_dev ? fi->fib_dev->name : "*",
+ prefix,
+ fi->fib_nh->nh_gw, flags, 0, 0,
+ fi->fib_priority,
+ mask,
+ (fi->fib_advmss ?
+ fi->fib_advmss + 40 : 0),
+ fi->fib_window,
+ fi->fib_rtt >> 3);
+ else
+ seq_printf(seq,
+ "*\t%08X\t%08X\t%04X\t%d\t%u\t"
+ "%d\t%08X\t%d\t%u\t%u",
+ prefix, 0, flags, 0, 0, 0,
+ mask, 0, 0, 0);
+
+ seq_pad(seq, '\n');
+ }
+ }
+
+ return 0;
+}
+
+static const struct seq_operations fib_route_seq_ops = {
+ .start = fib_route_seq_start,
+ .next = fib_route_seq_next,
+ .stop = fib_route_seq_stop,
+ .show = fib_route_seq_show,
+};
+
+static int fib_route_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &fib_route_seq_ops,
+ sizeof(struct fib_route_iter));
+}
+
+static const struct file_operations fib_route_fops = {
+ .owner = THIS_MODULE,
+ .open = fib_route_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+int __net_init fib_proc_init(struct net *net)
+{
+ if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops))
+ return -ENOMEM;
+
+ return 0;
+}
+
+void __net_exit fib_proc_exit(struct net *net)
+{
+ remove_proc_entry("route", net->proc_net);
+}
+
+#endif /* CONFIG_PROC_FS */
+
+void __init fib_trie_init(void) {}
--
1.9.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/