Re: [PATCH bpf-next v3 5/7] bpf: teach the verifier to enforce css_iter and task_iter in RCU CS

From: Yafang Shao
Date: Wed Sep 27 2023 - 06:01:02 EST


On Mon, Sep 25, 2023 at 6:56 PM Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx> wrote:
>
> css_iter and task_iter should be used in rcu section. Specifically, in
> sleepable progs explicit bpf_rcu_read_lock() is needed before use these
> iters. In normal bpf progs that have implicit rcu_read_lock(), it's OK to
> use them directly.
>
> This patch adds a new a KF flag KF_RCU_PROTECTED for bpf_iter_task_new and
> bpf_iter_css_new. It means the kfunc should be used in RCU CS. We check
> whether we are in rcu cs before we want to invoke this kfunc. If the rcu
> protection is guaranteed, we would let st->type = PTR_TO_STACK | MEM_RCU.
> Once user do rcu_unlock during the iteration, state MEM_RCU of regs would
> be cleared. is_iter_reg_valid_init() will reject if reg->type is UNTRUSTED.
>
> It is worth noting that currently, bpf_rcu_read_unlock does not
> clear the state of the STACK_ITER reg, since bpf_for_each_spilled_reg
> only considers STACK_SPILL. This patch also let bpf_for_each_spilled_reg
> search STACK_ITER.
>
> Signed-off-by: Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx>

This patch should be ahead of patch #2 and you introduce
KF_RCU_PROTECTED in it then use this flag in later patches.
BTW, I can't apply your series on bpf-next. I think you should rebase
it on the latest bpf-next, otherwise the BPF CI can't be triggered.

> ---
> include/linux/bpf_verifier.h | 19 ++++++++------
> include/linux/btf.h | 1 +
> kernel/bpf/helpers.c | 4 +--
> kernel/bpf/verifier.c | 48 +++++++++++++++++++++++++++---------
> 4 files changed, 50 insertions(+), 22 deletions(-)
>
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index a3236651ec64..b5cdcc332b0a 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -385,19 +385,18 @@ struct bpf_verifier_state {
> u32 jmp_history_cnt;
> };
>
> -#define bpf_get_spilled_reg(slot, frame) \
> +#define bpf_get_spilled_reg(slot, frame, mask) \
> (((slot < frame->allocated_stack / BPF_REG_SIZE) && \
> - (frame->stack[slot].slot_type[0] == STACK_SPILL)) \
> + ((1 << frame->stack[slot].slot_type[0]) & (mask))) \
> ? &frame->stack[slot].spilled_ptr : NULL)
>
> /* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */
> -#define bpf_for_each_spilled_reg(iter, frame, reg) \
> - for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \
> +#define bpf_for_each_spilled_reg(iter, frame, reg, mask) \
> + for (iter = 0, reg = bpf_get_spilled_reg(iter, frame, mask); \
> iter < frame->allocated_stack / BPF_REG_SIZE; \
> - iter++, reg = bpf_get_spilled_reg(iter, frame))
> + iter++, reg = bpf_get_spilled_reg(iter, frame, mask))
>
> -/* Invoke __expr over regsiters in __vst, setting __state and __reg */
> -#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \
> +#define bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, __mask, __expr) \
> ({ \
> struct bpf_verifier_state *___vstate = __vst; \
> int ___i, ___j; \
> @@ -409,7 +408,7 @@ struct bpf_verifier_state {
> __reg = &___regs[___j]; \
> (void)(__expr); \
> } \
> - bpf_for_each_spilled_reg(___j, __state, __reg) { \
> + bpf_for_each_spilled_reg(___j, __state, __reg, __mask) { \
> if (!__reg) \
> continue; \
> (void)(__expr); \
> @@ -417,6 +416,10 @@ struct bpf_verifier_state {
> } \
> })
>
> +/* Invoke __expr over regsiters in __vst, setting __state and __reg */
> +#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \
> + bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, 1 << STACK_SPILL, __expr)
> +
> /* linked list of verifier states used to prune search */
> struct bpf_verifier_state_list {
> struct bpf_verifier_state state;
> diff --git a/include/linux/btf.h b/include/linux/btf.h
> index 928113a80a95..c2231c64d60b 100644
> --- a/include/linux/btf.h
> +++ b/include/linux/btf.h
> @@ -74,6 +74,7 @@
> #define KF_ITER_NEW (1 << 8) /* kfunc implements BPF iter constructor */
> #define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */
> #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */
> +#define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */
>
> /*
> * Tag marking a kernel function as a kfunc. This is meant to minimize the
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 9c3af36249a2..aa9e03fbfe1a 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -2507,10 +2507,10 @@ BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
> BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
> BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL)
> BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY)
> -BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
> +BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED)
> BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL)
> BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY)
> -BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
> +BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED)
> BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT | KF_RET_NULL)
> BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY)
> BTF_ID_FLAGS(func, bpf_dynptr_adjust)
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 2367483bf4c2..a065e18a0b3a 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -1172,7 +1172,12 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg
>
> static void __mark_reg_known_zero(struct bpf_reg_state *reg);
>
> +static bool in_rcu_cs(struct bpf_verifier_env *env);
> +
> +static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
> +
> static int mark_stack_slots_iter(struct bpf_verifier_env *env,
> + struct bpf_kfunc_call_arg_meta *meta,
> struct bpf_reg_state *reg, int insn_idx,
> struct btf *btf, u32 btf_id, int nr_slots)
> {
> @@ -1193,6 +1198,12 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env,
>
> __mark_reg_known_zero(st);
> st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
> + if (is_kfunc_rcu_protected(meta)) {
> + if (in_rcu_cs(env))
> + st->type |= MEM_RCU;

I think this change is incorrect. The type of st->type is enum
bpf_reg_type, but MEM_RCU is enum bpf_type_flag.
Or am I missing something?

> + else
> + st->type |= PTR_UNTRUSTED;
> + }
> st->live |= REG_LIVE_WRITTEN;
> st->ref_obj_id = i == 0 ? id : 0;
> st->iter.btf = btf;
> @@ -1267,7 +1278,7 @@ static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
> return true;
> }
>
> -static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
> +static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
> struct btf *btf, u32 btf_id, int nr_slots)
> {
> struct bpf_func_state *state = func(env, reg);
> @@ -1275,26 +1286,28 @@ static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_
>
> spi = iter_get_spi(env, reg, nr_slots);
> if (spi < 0)
> - return false;
> + return -EINVAL;
>
> for (i = 0; i < nr_slots; i++) {
> struct bpf_stack_state *slot = &state->stack[spi - i];
> struct bpf_reg_state *st = &slot->spilled_ptr;
>
> + if (st->type & PTR_UNTRUSTED)
> + return -EPERM;
> /* only main (first) slot has ref_obj_id set */
> if (i == 0 && !st->ref_obj_id)
> - return false;
> + return -EINVAL;
> if (i != 0 && st->ref_obj_id)
> - return false;
> + return -EINVAL;
> if (st->iter.btf != btf || st->iter.btf_id != btf_id)
> - return false;
> + return -EINVAL;
>
> for (j = 0; j < BPF_REG_SIZE; j++)
> if (slot->slot_type[j] != STACK_ITER)
> - return false;
> + return -EINVAL;
> }
>
> - return true;
> + return 0;
> }
>
> /* Check if given stack slot is "special":
> @@ -7503,15 +7516,20 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
> return err;
> }
>
> - err = mark_stack_slots_iter(env, reg, insn_idx, meta->btf, btf_id, nr_slots);
> + err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots);
> if (err)
> return err;
> } else {
> /* iter_next() or iter_destroy() expect initialized iter state*/
> - if (!is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots)) {
> - verbose(env, "expected an initialized iter_%s as arg #%d\n",
> + err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
> + switch (err) {
> + case -EINVAL:
> + verbose(env, "expected an initialized iter_%s as arg #%d or without bpf_rcu_read_lock()\n",
> iter_type_str(meta->btf, btf_id), regno);
> - return -EINVAL;
> + return err;
> + case -EPERM:
> + verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
> + return err;
> }
>
> spi = iter_get_spi(env, reg, nr_slots);
> @@ -10092,6 +10110,11 @@ static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
> return meta->kfunc_flags & KF_RCU;
> }
>
> +static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
> +{
> + return meta->kfunc_flags & KF_RCU_PROTECTED;
> +}
> +
> static bool __kfunc_param_match_suffix(const struct btf *btf,
> const struct btf_param *arg,
> const char *suffix)
> @@ -11428,6 +11451,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> if (env->cur_state->active_rcu_lock) {
> struct bpf_func_state *state;
> struct bpf_reg_state *reg;
> + u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
>
> if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
> verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
> @@ -11438,7 +11462,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
> return -EINVAL;
> } else if (rcu_unlock) {
> - bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
> + bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
> if (reg->type & MEM_RCU) {
> reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
> reg->type |= PTR_UNTRUSTED;
> --
> 2.20.1
>
>


--
Regards
Yafang