[RFC PATCH 11/16] bpf/crib: Introduce skb open-coded iterator kfuncs

From: Juntong Deng
Date: Wed Jul 10 2024 - 14:47:17 EST


This patch adds open-coded iterator style socket queue skb iterator
kfuncs bpf_iter_skb_{new,next,destroy} that iterates over all skb
(struct sk_buff) in the specified socket queue (struct sk_buff_head) .

The reference to struct sk_buff acquired by the previous
bpf_iter_skb_next() is released in the next bpf_iter_skb_next(),
and the last reference is released in the last bpf_iter_skb_next()
that returns NULL.

In the bpf_iter_skb_destroy(), if the iterator does not iterate to the end,
then the last struct sk_buff reference is released at this time.

Signed-off-by: Juntong Deng <juntong.deng@xxxxxxxxxxx>
---
include/linux/bpf_crib.h | 9 ++++
kernel/bpf/crib/bpf_checkpoint.c | 79 ++++++++++++++++++++++++++++++++
kernel/bpf/crib/bpf_crib.c | 4 ++
3 files changed, 92 insertions(+)

diff --git a/include/linux/bpf_crib.h b/include/linux/bpf_crib.h
index 468ae87fa1a5..e7cfa9c1ae6b 100644
--- a/include/linux/bpf_crib.h
+++ b/include/linux/bpf_crib.h
@@ -23,4 +23,13 @@ struct bpf_iter_task_file_kern {
int fd;
} __aligned(8);

+struct bpf_iter_skb {
+ __u64 __opaque[2];
+} __aligned(8);
+
+struct bpf_iter_skb_kern {
+ struct sk_buff_head *head;
+ struct sk_buff *skb;
+} __aligned(8);
+
#endif /* _BPF_CRIB_H */
diff --git a/kernel/bpf/crib/bpf_checkpoint.c b/kernel/bpf/crib/bpf_checkpoint.c
index d8cd4a1b73dc..c95844faecbc 100644
--- a/kernel/bpf/crib/bpf_checkpoint.c
+++ b/kernel/bpf/crib/bpf_checkpoint.c
@@ -14,6 +14,10 @@

extern void bpf_file_release(struct file *file);

+extern struct sk_buff *bpf_skb_acquire(struct sk_buff *skb);
+
+extern void bpf_skb_release(struct sk_buff *skb);
+
__bpf_kfunc_start_defs();

/**
@@ -162,4 +166,79 @@ __bpf_kfunc int bpf_cal_skb_size(struct sk_buff *skb)
return skb_end_offset(skb) + skb->data_len;
}

+/**
+ * bpf_iter_skb_new() - Initialize a new skb iterator for a socket
+ * queue (sk_buff_head), used to iterates over all skb in the specified
+ * socket queue
+ *
+ * @it: The new bpf_iter_skb to be created
+ * @head: A pointer pointing to a sk_buff_head to be iterated over
+ */
+__bpf_kfunc int bpf_iter_skb_new(struct bpf_iter_skb *it,
+ struct sk_buff_head *head)
+{
+ struct bpf_iter_skb_kern *kit = (void *)it;
+
+ BUILD_BUG_ON(sizeof(struct bpf_iter_skb_kern) != sizeof(struct bpf_iter_skb));
+ BUILD_BUG_ON(__alignof__(struct bpf_iter_skb_kern) != __alignof__(struct bpf_iter_skb));
+
+ kit->head = head;
+ kit->skb = NULL;
+
+ return 0;
+}
+
+/**
+ * bpf_iter_skb_next() - Get the next skb in bpf_iter_skb
+ *
+ * bpf_iter_skb_next() acquires a reference to the returned struct sk_buff.
+ *
+ * The reference to struct sk_buff acquired by the previous bpf_iter_skb_next()
+ * is released in the next bpf_iter_skb_next(), and the last reference is
+ * released in the last bpf_iter_skb_next() that returns NULL.
+ *
+ * @it: bpf_iter_skb to be checked
+ *
+ * @returns a pointer to the struct sk_buff of the next skb if further skbs
+ * are available, otherwise returns NULL.
+ */
+__bpf_kfunc struct sk_buff *bpf_iter_skb_next(struct bpf_iter_skb *it)
+{
+ struct bpf_iter_skb_kern *kit = (void *)it;
+ unsigned long flags;
+
+ if (kit->skb)
+ bpf_skb_release(kit->skb);
+
+ spin_lock_irqsave(&kit->head->lock, flags);
+
+ if (!kit->skb)
+ kit->skb = skb_peek(kit->head);
+ else
+ kit->skb = skb_peek_next(kit->skb, kit->head);
+
+ spin_unlock_irqrestore(&kit->head->lock, flags);
+
+ if (kit->skb)
+ bpf_skb_acquire(kit->skb);
+
+ return kit->skb;
+}
+
+/**
+ * bpf_iter_skb_destroy() - Destroy a bpf_iter_skb
+ *
+ * If the iterator does not iterate to the end, then the last
+ * struct sk_buff reference is released at this time.
+ *
+ * @it: bpf_iter_skb to be destroyed
+ */
+__bpf_kfunc void bpf_iter_skb_destroy(struct bpf_iter_skb *it)
+{
+ struct bpf_iter_skb_kern *kit = (void *)it;
+
+ if (kit->skb)
+ bpf_skb_release(kit->skb);
+}
+
__bpf_kfunc_end_defs();
diff --git a/kernel/bpf/crib/bpf_crib.c b/kernel/bpf/crib/bpf_crib.c
index 21889efa620c..fda34d8143f1 100644
--- a/kernel/bpf/crib/bpf_crib.c
+++ b/kernel/bpf/crib/bpf_crib.c
@@ -289,6 +289,10 @@ BTF_ID_FLAGS(func, bpf_skb_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_cal_skb_size, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_skb_peek_tail, KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL)

+BTF_ID_FLAGS(func, bpf_iter_skb_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_iter_skb_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_skb_destroy, KF_ITER_DESTROY)
+
BTF_KFUNCS_END(bpf_crib_kfuncs)

static int bpf_prog_run_crib(struct bpf_prog *prog,
--
2.39.2