Re: [PATCH bpf-next v4 4/8] bpf: Introduce css open-coded iterator kfuncs

From: Chuyi Zhou
Date: Wed Oct 11 2023 - 01:32:36 EST




在 2023/10/11 12:44, Chuyi Zhou 写道:


在 2023/10/7 20:45, Chuyi Zhou 写道:
This Patch adds kfuncs bpf_iter_css_{new,next,destroy} which allow
creation and manipulation of struct bpf_iter_css in open-coded iterator
style. These kfuncs actually wrapps css_next_descendant_{pre, post}.
css_iter can be used to:

1) iterating a sepcific cgroup tree with pre/post/up order

2) iterating cgroup_subsystem in BPF Prog, like
for_each_mem_cgroup_tree/cpuset_for_each_descendant_pre in kernel.

The API design is consistent with cgroup_iter. bpf_iter_css_new accepts
parameters defining iteration order and starting css. Here we also reuse
BPF_CGROUP_ITER_DESCENDANTS_PRE, BPF_CGROUP_ITER_DESCENDANTS_POST,
BPF_CGROUP_ITER_ANCESTORS_UP enums.

Signed-off-by: Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx>
Acked-by: Tejun Heo <tj@xxxxxxxxxx>
---
  kernel/bpf/cgroup_iter.c                      | 59 +++++++++++++++++++
  kernel/bpf/helpers.c                          |  3 +
  .../testing/selftests/bpf/bpf_experimental.h  |  6 ++
  3 files changed, 68 insertions(+)

diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c
index 810378f04fbc..9c6ad892ae82 100644
--- a/kernel/bpf/cgroup_iter.c
+++ b/kernel/bpf/cgroup_iter.c
@@ -294,3 +294,62 @@ static int __init bpf_cgroup_iter_init(void)
  }
  late_initcall(bpf_cgroup_iter_init);
+
+struct bpf_iter_css {
+    __u64 __opaque[3];
+} __attribute__((aligned(8)));
+
+struct bpf_iter_css_kern {
+    struct cgroup_subsys_state *start;
+    struct cgroup_subsys_state *pos;
+    unsigned int flags;
+} __attribute__((aligned(8)));
+
+__bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it,
+        struct cgroup_subsys_state *start, unsigned int flags)
+{
+    struct bpf_iter_css_kern *kit = (void *)it;
+
+    BUILD_BUG_ON(sizeof(struct bpf_iter_css_kern) != sizeof(struct bpf_iter_css));
+    BUILD_BUG_ON(__alignof__(struct bpf_iter_css_kern) != __alignof__(struct bpf_iter_css));
+

This would cause the fail of netdev/build_32bit CI (https://netdev.bots.linux.dev/static/nipa/790929/13412333/build_32bit/stderr):

tools/testing/selftests/kvm/settings: warning: ignored by one of the .gitignore files
../kernel/bpf/cgroup_iter.c:308:17: warning: no previous prototype for ‘bpf_iter_css_new’ [-Wmissing-prototypes]
  308 | __bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it,
      |                 ^~~~~~~~~~~~~~~~
../kernel/bpf/cgroup_iter.c:332:41: warning: no previous prototype for ‘bpf_iter_css_next’ [-Wmissing-prototypes]
  332 | __bpf_kfunc struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it)
      |                                         ^~~~~~~~~~~~~~~~~
../kernel/bpf/cgroup_iter.c:353:18: warning: no previous prototype for ‘bpf_iter_css_destroy’ [-Wmissing-prototypes]
  353 | __bpf_kfunc void bpf_iter_css_destroy(struct bpf_iter_css *it)
      |                  ^~~~~~~~~~~~~~~~~~~~
In file included from <command-line>:
../kernel/bpf/cgroup_iter.c: In function ‘bpf_iter_css_new’:
./../include/linux/compiler_types.h:425:45: error: call to ‘__compiletime_assert_322’ declared with attribute error: BUILD_BUG_ON failed: sizeof(struct bpf_iter_css_kern) != sizeof(struct bpf_iter_css)
  425 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
      |                                             ^
./../include/linux/compiler_types.h:406:25: note: in definition of macro ‘__compiletime_assert’
  406 |                         prefix ## suffix();         \
      |                         ^~~~~~
./../include/linux/compiler_types.h:425:9: note: in expansion of macro ‘_compiletime_assert’
  425 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
      |         ^~~~~~~~~~~~~~~~~~~
../include/linux/build_bug.h:39:37: note: in expansion of macro ‘compiletime_assert’
   39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
      |                                     ^~~~~~~~~~~~~~~~~~
../include/linux/build_bug.h:50:9: note: in expansion of macro ‘BUILD_BUG_ON_MSG’
   50 |         BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
      |         ^~~~~~~~~~~~~~~~
../kernel/bpf/cgroup_iter.c:313:9: note: in expansion of macro ‘BUILD_BUG_ON’
  313 |         BUILD_BUG_ON(sizeof(struct bpf_iter_css_kern) != sizeof(struct bpf_iter_css));


The reason seems on 32-bit machine, sizeof(struct bpf_iter_css) is 24 and sizeof(struct bpf_iter_css_kern) is 16.

I was wondering whether the BUILD_BUG_ON check is necessary. Looking at the struct bpf_list_node and struct bpf_list_node_kern wich are very similay to bpf_iter_css, I didn't see the BUILD_BUG_ON check when convert from (struct bpf_list_node *) to (struct bpf_list_node_kern *)

/* Non-opaque version of bpf_list_node in uapi/linux/bpf.h */
struct bpf_list_node_kern {
    struct list_head list_head;
    void *owner;
} __attribute__((aligned(8)));

struct bpf_list_node {
    __u64 :64;
    __u64 :64;
    __u64 :64;
} __attribute__((aligned(8)));

__bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head,
                    struct bpf_list_node *node,
                    void *meta__ign, u64 off)
{
    struct bpf_list_node_kern *n = (void *)node;

}


or we can change the BUILD_BUG_ON check, like bpf_timer_kern in bpf_timer_init:

--- a/kernel/bpf/cgroup_iter.c
+++ b/kernel/bpf/cgroup_iter.c
@@ -310,7 +310,7 @@ __bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it,
{
struct bpf_iter_css_kern *kit = (void *)it;

- BUILD_BUG_ON(sizeof(struct bpf_iter_css_kern) != sizeof(struct bpf_iter_css));
+ BUILD_BUG_ON(sizeof(struct bpf_iter_css_kern) > sizeof(struct bpf_iter_css));
BUILD_BUG_ON(__alignof__(struct bpf_iter_css_kern) != __alignof__(struct bpf_iter_css));

kit->start = NULL;
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 773be9a221f5..0772545568f1 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -877,7 +877,7 @@ __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it,
{
struct bpf_iter_task_kern *kit = (void *)it;

- BUILD_BUG_ON(sizeof(struct bpf_iter_task_kern) != sizeof(struct bpf_iter_task));
+ BUILD_BUG_ON(sizeof(struct bpf_iter_task_kern) > sizeof(struct bpf_iter_task));
BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) !=
__alignof__(struct bpf_iter_task));