[RFC PATCH net-next v2 2/3] net: dst_cache: add input_dst_cache API

From: Leone Fernando
Date: Mon Feb 12 2024 - 06:33:59 EST


The input_dst_cache allows fast lookup of frequently encountered dsts.

In order to provide stable results, I implemented a simple linear
hashtable with each bucket containing a constant amount of
entries (DST_CACHE_INPUT_BUCKET_SIZE).

Similarly to how the route hint is used, I defined the hashtable key to
contain the daddr and the tos of the IP header.

Lookup is performed in a straightforward manner: start at the bucket head
corresponding the hashed key and search the following
DST_CACHE_INPUT_BUCKET_SIZE entries of the array for a matching key.

When inserting a new dst to the cache, if all the bucket entries are
full, the oldest one is deleted to make room for the new dst.

Signed-off-by: Leone Fernando <leone4fernando@xxxxxxxxx>
---
include/net/dst_cache.h | 69 +++++++++++++++++++++
net/core/dst_cache.c | 132 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 201 insertions(+)

diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h
index df6622a5fe98..4c5474c44cad 100644
--- a/include/net/dst_cache.h
+++ b/include/net/dst_cache.h
@@ -7,12 +7,40 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ip6_fib.h>
#endif
+#include <net/ip.h>
+
+#define DST_CACHE_INPUT_SHIFT (9)
+#define DST_CACHE_INPUT_SIZE (1 << DST_CACHE_INPUT_SHIFT)
+#define DST_CACHE_INPUT_BUCKET_SIZE (4)
+#define DST_CACHE_INPUT_HASH_MASK (~(DST_CACHE_INPUT_BUCKET_SIZE - 1))
+#define INVALID_DST_CACHE_INPUT_KEY (~(u64)(0))

struct dst_cache {
struct dst_cache_pcpu __percpu *cache;
unsigned long reset_ts;
};

+extern unsigned int dst_cache_net_id __read_mostly;
+
+/**
+ * idst_for_each_in_bucket - iterate over a dst cache bucket
+ * @pos: the type * to use as a loop cursor
+ * @head: the head of the cpu dst cache.
+ * @hash: the hash of the bucket
+ */
+#define idst_for_each_in_bucket(pos, head, hash) \
+ for (pos = &head[hash]; \
+ pos < &head[hash + DST_CACHE_INPUT_BUCKET_SIZE]; \
+ pos++)
+
+/**
+ * idst_for_each_in_cache - iterate over the dst cache
+ * @pos: the type * to use as a loop cursor
+ * @head: the head of the cpu dst cache.
+ */
+#define idst_for_each_in_cache(pos, head) \
+ for (pos = head; pos < head + DST_CACHE_INPUT_SIZE; pos++)
+
/**
* dst_cache_get - perform cache lookup
* @dst_cache: the cache
@@ -106,4 +134,45 @@ int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp);
*/
void dst_cache_destroy(struct dst_cache *dst_cache);

+/**
+ * dst_cache_input_get_noref - perform lookup in the input cache,
+ * return a noref dst
+ * @dst_cache: the input cache
+ * @skb: the packet according to which the dst entry will be searched
+ * local BH must be disabled.
+ */
+struct dst_entry *dst_cache_input_get_noref(struct dst_cache *dst_cache,
+ struct sk_buff *skb);
+
+/**
+ * dst_cache_input_add - add the dst of the given skb to the input cache.
+ *
+ * in case the cache bucket is full, the oldest entry will be deleted
+ * and replaced with the new one.
+ * @dst_cache: the input cache
+ * @skb: The packet according to which the dst entry will be searched
+ *
+ * local BH must be disabled.
+ */
+void dst_cache_input_add(struct dst_cache *dst_cache,
+ const struct sk_buff *skb);
+
+/**
+ * dst_cache_input_init - initialize the input cache,
+ * allocating the required storage
+ */
+int __init dst_cache_input_init(void);
+
+static inline u64 create_dst_cache_key_ip4(const struct sk_buff *skb)
+{
+ struct iphdr *iphdr = ip_hdr(skb);
+
+ return (((u64)iphdr->daddr) << 8) | iphdr->tos;
+}
+
+static inline u32 hash_dst_cache_key(u64 key)
+{
+ return hash_64(key, DST_CACHE_INPUT_SHIFT) & DST_CACHE_INPUT_HASH_MASK;
+}
+
#endif
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 0ccfd5fa5cb9..e551b9ce5108 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -13,6 +13,7 @@
#include <net/ip6_fib.h>
#endif
#include <uapi/linux/in.h>
+#include <net/netns/generic.h>

struct dst_cache_pcpu {
unsigned long refresh_ts;
@@ -21,9 +22,12 @@ struct dst_cache_pcpu {
union {
struct in_addr in_saddr;
struct in6_addr in6_saddr;
+ u64 key;
};
};

+unsigned int dst_cache_net_id __read_mostly;
+
static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
struct dst_entry *dst, u32 cookie)
{
@@ -181,3 +185,131 @@ void dst_cache_reset_now(struct dst_cache *dst_cache)
}
}
EXPORT_SYMBOL_GPL(dst_cache_reset_now);
+
+static void dst_cache_input_set(struct dst_cache_pcpu *idst,
+ struct dst_entry *dst, u64 key)
+{
+ dst_cache_per_cpu_dst_set(idst, dst, 0);
+ idst->key = key;
+ idst->refresh_ts = jiffies;
+}
+
+static struct dst_entry *__dst_cache_input_get_noref(struct dst_cache_pcpu *idst)
+{
+ struct dst_entry *dst = idst->dst;
+
+ if (unlikely(dst->obsolete && !dst->ops->check(dst, idst->cookie))) {
+ dst_cache_input_set(idst, NULL, INVALID_DST_CACHE_INPUT_KEY);
+ goto fail;
+ }
+
+ idst->refresh_ts = jiffies;
+ return dst;
+
+fail:
+ return NULL;
+}
+
+struct dst_entry *dst_cache_input_get_noref(struct dst_cache *dst_cache,
+ struct sk_buff *skb)
+{
+ struct dst_entry *out_dst = NULL;
+ struct dst_cache_pcpu *pcpu_cache;
+ struct dst_cache_pcpu *idst;
+ u32 hash;
+ u64 key;
+
+ pcpu_cache = this_cpu_ptr(dst_cache->cache);
+ key = create_dst_cache_key_ip4(skb);
+ hash = hash_dst_cache_key(key);
+ idst_for_each_in_bucket(idst, pcpu_cache, hash) {
+ if (key == idst->key) {
+ out_dst = __dst_cache_input_get_noref(idst);
+ goto out;
+ }
+ }
+out:
+ return out_dst;
+}
+
+static void dst_cache_input_reset_now(struct dst_cache *dst_cache)
+{
+ struct dst_cache_pcpu *caches;
+ struct dst_cache_pcpu *idst;
+ struct dst_entry *dst;
+ int i;
+
+ for_each_possible_cpu(i) {
+ caches = per_cpu_ptr(dst_cache->cache, i);
+ idst_for_each_in_cache(idst, caches) {
+ idst->key = INVALID_DST_CACHE_INPUT_KEY;
+ dst = idst->dst;
+ if (dst)
+ dst_release(dst);
+ }
+ }
+}
+
+static int __net_init dst_cache_input_net_init(struct net *net)
+{
+ struct dst_cache *dst_cache = net_generic(net, dst_cache_net_id);
+
+ dst_cache->cache = (struct dst_cache_pcpu __percpu *)alloc_percpu_gfp(struct dst_cache_pcpu[DST_CACHE_INPUT_SHIFT],
+ GFP_KERNEL | __GFP_ZERO);
+ if (!dst_cache->cache)
+ return -ENOMEM;
+
+ dst_cache_input_reset_now(dst_cache);
+ return 0;
+}
+
+static void __net_exit dst_cache_input_net_exit(struct net *net)
+{
+ struct dst_cache *dst_cache = net_generic(net, dst_cache_net_id);
+
+ dst_cache_input_reset_now(dst_cache);
+ free_percpu(dst_cache->cache);
+ dst_cache->cache = NULL;
+}
+
+static bool idst_empty(struct dst_cache_pcpu *idst)
+{
+ return idst->key == INVALID_DST_CACHE_INPUT_KEY;
+}
+
+void dst_cache_input_add(struct dst_cache *dst_cache, const struct sk_buff *skb)
+{
+ struct dst_cache_pcpu *entry = NULL;
+ struct dst_cache_pcpu *pcpu_cache;
+ struct dst_cache_pcpu *idst;
+ u32 hash;
+ u64 key;
+
+ pcpu_cache = this_cpu_ptr(dst_cache->cache);
+ key = create_dst_cache_key_ip4(skb);
+ hash = hash_dst_cache_key(key);
+ idst_for_each_in_bucket(idst, pcpu_cache, hash) {
+ if (idst_empty(idst)) {
+ entry = idst;
+ goto add_to_cache;
+ }
+ if (!entry || time_before(idst->refresh_ts, entry->refresh_ts))
+ entry = idst;
+ }
+
+add_to_cache:
+ dst_cache_input_set(entry, skb_dst(skb), key);
+}
+
+static struct pernet_operations dst_cache_input_ops __net_initdata = {
+ .init = dst_cache_input_net_init,
+ .exit = dst_cache_input_net_exit,
+ .id = &dst_cache_net_id,
+ .size = sizeof(struct dst_cache),
+};
+
+int __init dst_cache_input_init(void)
+{
+ return register_pernet_subsys(&dst_cache_input_ops);
+}
+subsys_initcall(dst_cache_input_init);
--
2.34.1