Re: [PATCH bpf-next v5 4/5] bpf: verifier: Support eliding map lookup nullness

From: Kumar Kartikeya Dwivedi
Date: Fri Dec 13 2024 - 18:10:59 EST


On Fri, 13 Dec 2024 at 00:24, Daniel Xu <dxu@xxxxxxxxx> wrote:
>
> This commit allows progs to elide a null check on statically known map
> lookup keys. In other words, if the verifier can statically prove that
> the lookup will be in-bounds, allow the prog to drop the null check.
>
> This is useful for two reasons:
>
> 1. Large numbers of nullness checks (especially when they cannot fail)
> unnecessarily pushes prog towards BPF_COMPLEXITY_LIMIT_JMP_SEQ.
> 2. It forms a tighter contract between programmer and verifier.
>
> For (1), bpftrace is starting to make heavier use of percpu scratch
> maps. As a result, for user scripts with large number of unrolled loops,
> we are starting to hit jump complexity verification errors. These
> percpu lookups cannot fail anyways, as we only use static key values.
> Eliding nullness probably results in less work for verifier as well.
>
> For (2), percpu scratch maps are often used as a larger stack, as the
> currrent stack is limited to 512 bytes. In these situations, it is
> desirable for the programmer to express: "this lookup should never fail,
> and if it does, it means I messed up the code". By omitting the null
> check, the programmer can "ask" the verifier to double check the logic.
>
> Tests also have to be updated in sync with these changes, as the
> verifier is more efficient with this change. Notable, iters.c tests had
> to be changed to use a map type that still requires null checks, as it's
> exercising verifier tracking logic w.r.t iterators.
>
> Signed-off-by: Daniel Xu <dxu@xxxxxxxxx>
> ---
> kernel/bpf/verifier.c | 80 ++++++++++++++++++-
> tools/testing/selftests/bpf/progs/iters.c | 14 ++--
> .../selftests/bpf/progs/map_kptr_fail.c | 2 +-
> .../selftests/bpf/progs/verifier_map_in_map.c | 2 +-
> .../testing/selftests/bpf/verifier/map_kptr.c | 2 +-
> 5 files changed, 87 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 58b36cc96bd5..4947ef884a18 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -287,6 +287,7 @@ struct bpf_call_arg_meta {
> u32 ret_btf_id;
> u32 subprogno;
> struct btf_field *kptr_field;
> + s64 const_map_key;
> };
>
> struct bpf_kfunc_call_arg_meta {
> @@ -9163,6 +9164,53 @@ static int check_reg_const_str(struct bpf_verifier_env *env,
> return 0;
> }
>
> +/* Returns constant key value if possible, else -1 */
> +static s64 get_constant_map_key(struct bpf_verifier_env *env,
> + struct bpf_reg_state *key,
> + u32 key_size)
> +{
> + struct bpf_func_state *state = func(env, key);
> + struct bpf_reg_state *reg;
> + int zero_size = 0;
> + int stack_off;
> + u8 *stype;
> + int slot;
> + int spi;
> + int i;
> +
> + if (!env->bpf_capable)
> + return -1;
> + if (key->type != PTR_TO_STACK)
> + return -1;
> + if (!tnum_is_const(key->var_off))
> + return -1;
> +
> + stack_off = key->off + key->var_off.value;
> + slot = -stack_off - 1;
> + spi = slot / BPF_REG_SIZE;
> +
> + /* First handle precisely tracked STACK_ZERO, up to BPF_REG_SIZE */
> + stype = state->stack[spi].slot_type;
> + for (i = 0; i < BPF_REG_SIZE && stype[i] == STACK_ZERO; i++)
> + zero_size++;
> + if (zero_size == key_size)
> + return 0;
> +
> + if (!is_spilled_reg(&state->stack[spi]))
> + /* Not pointer to stack */
> + return -1;
> +
> + reg = &state->stack[spi].spilled_ptr;
> + if (reg->type != SCALAR_VALUE)
> + /* Only scalars are valid array map keys */
> + return -1;
> + else if (!tnum_is_const(reg->var_off))
> + /* Stack value not statically known */
> + return -1;
> +
> + return reg->var_off.value;
> +}
> +
> static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
> struct bpf_call_arg_meta *meta,
> const struct bpf_func_proto *fn,
> @@ -9173,6 +9221,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
> enum bpf_arg_type arg_type = fn->arg_type[arg];
> enum bpf_reg_type type = reg->type;
> u32 *arg_btf_id = NULL;
> + u32 key_size;
> int err = 0;
> bool mask;
>
> @@ -9307,8 +9356,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
> verbose(env, "invalid map_ptr to access map->key\n");
> return -EACCES;
> }
> - err = check_helper_mem_access(env, regno, meta->map_ptr->key_size,
> - BPF_READ, false, NULL);
> + key_size = meta->map_ptr->key_size;
> + err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL);
> + if (err)
> + return err;
> + meta->const_map_key = get_constant_map_key(env, reg, key_size);
> break;
> case ARG_PTR_TO_MAP_VALUE:
> if (type_may_be_null(arg_type) && register_is_null(reg))
> @@ -10833,6 +10885,21 @@ static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno
> state->callback_subprogno == subprogno);
> }
>
> +/* Returns whether or not the given map type can potentially elide
> + * lookup return value nullness check. This is possible if the key
> + * is statically known.
> + */
> +static bool can_elide_value_nullness(enum bpf_map_type type)
> +{
> + switch (type) {
> + case BPF_MAP_TYPE_ARRAY:
> + case BPF_MAP_TYPE_PERCPU_ARRAY:
> + return true;
> + default:
> + return false;
> + }
> +}
> +
> static int get_helper_proto(struct bpf_verifier_env *env, int func_id,
> const struct bpf_func_proto **ptr)
> {
> @@ -11199,10 +11266,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
> "kernel subsystem misconfigured verifier\n");
> return -EINVAL;
> }
> +
> + if (func_id == BPF_FUNC_map_lookup_elem &&
> + can_elide_value_nullness(meta.map_ptr->map_type) &&
> + meta.const_map_key >= 0 &&
> + meta.const_map_key < meta.map_ptr->max_entries)
> + ret_flag &= ~PTR_MAYBE_NULL;

I think we probably need mark_chain_precision applied on the constant
key since its concrete value is made use of here to prevent pruning on
it. If it's already happening and I missed it, I think we should
atleast add a comment.

For context of a similar case with tail calls, see commit
cc52d9140aa9 ("bpf: Fix record_func_key to perform backtracking on r3")
for what happens when it is missed.

> +
> regs[BPF_REG_0].map_ptr = meta.map_ptr;
> regs[BPF_REG_0].map_uid = meta.map_uid;
> regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
> - if (!type_may_be_null(ret_type) &&
> + if (!type_may_be_null(ret_flag) &&
> btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) {
> regs[BPF_REG_0].id = ++env->id_gen;
> }
> diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
> index 7c969c127573..190822b2f08b 100644
> --- a/tools/testing/selftests/bpf/progs/iters.c
> +++ b/tools/testing/selftests/bpf/progs/iters.c
> @@ -524,11 +524,11 @@ int iter_subprog_iters(const void *ctx)
> }
>
> struct {
> - __uint(type, BPF_MAP_TYPE_ARRAY);
> + __uint(type, BPF_MAP_TYPE_HASH);
> __type(key, int);
> __type(value, int);
> __uint(max_entries, 1000);
> -} arr_map SEC(".maps");
> +} hash_map SEC(".maps");
>
> SEC("?raw_tp")
> __failure __msg("invalid mem access 'scalar'")
> @@ -539,7 +539,7 @@ int iter_err_too_permissive1(const void *ctx)
>
> MY_PID_GUARD();
>
> - map_val = bpf_map_lookup_elem(&arr_map, &key);
> + map_val = bpf_map_lookup_elem(&hash_map, &key);
> if (!map_val)
> return 0;
>
> @@ -561,12 +561,12 @@ int iter_err_too_permissive2(const void *ctx)
>
> MY_PID_GUARD();
>
> - map_val = bpf_map_lookup_elem(&arr_map, &key);
> + map_val = bpf_map_lookup_elem(&hash_map, &key);
> if (!map_val)
> return 0;
>
> bpf_repeat(1000000) {
> - map_val = bpf_map_lookup_elem(&arr_map, &key);
> + map_val = bpf_map_lookup_elem(&hash_map, &key);
> }
>
> *map_val = 123;
> @@ -585,7 +585,7 @@ int iter_err_too_permissive3(const void *ctx)
> MY_PID_GUARD();
>
> bpf_repeat(1000000) {
> - map_val = bpf_map_lookup_elem(&arr_map, &key);
> + map_val = bpf_map_lookup_elem(&hash_map, &key);
> found = true;
> }
>
> @@ -606,7 +606,7 @@ int iter_tricky_but_fine(const void *ctx)
> MY_PID_GUARD();
>
> bpf_repeat(1000000) {
> - map_val = bpf_map_lookup_elem(&arr_map, &key);
> + map_val = bpf_map_lookup_elem(&hash_map, &key);
> if (map_val) {
> found = true;
> break;
> diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
> index c2a6bd392e48..4c0ff01f1a96 100644
> --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c
> +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
> @@ -345,7 +345,7 @@ int reject_indirect_global_func_access(struct __sk_buff *ctx)
> }
>
> SEC("?tc")
> -__failure __msg("Unreleased reference id=5 alloc_insn=")
> +__failure __msg("Unreleased reference id=4 alloc_insn=")
> int kptr_xchg_ref_state(struct __sk_buff *ctx)
> {
> struct prog_test_ref_kfunc *p;
> diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
> index 4eaab1468eb7..7d088ba99ea5 100644
> --- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
> +++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
> @@ -47,7 +47,7 @@ l0_%=: r0 = 0; \
>
> SEC("xdp")
> __description("map in map state pruning")
> -__success __msg("processed 26 insns")
> +__success __msg("processed 15 insns")
> __log_level(2) __retval(0) __flag(BPF_F_TEST_STATE_FREQ)
> __naked void map_in_map_state_pruning(void)
> {
> diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c
> index f420c0312aa0..4b39f8472f9b 100644
> --- a/tools/testing/selftests/bpf/verifier/map_kptr.c
> +++ b/tools/testing/selftests/bpf/verifier/map_kptr.c
> @@ -373,7 +373,7 @@
> .prog_type = BPF_PROG_TYPE_SCHED_CLS,
> .fixup_map_kptr = { 1 },
> .result = REJECT,
> - .errstr = "Unreleased reference id=5 alloc_insn=20",
> + .errstr = "Unreleased reference id=4 alloc_insn=20",
> .fixup_kfunc_btf_id = {
> { "bpf_kfunc_call_test_acquire", 15 },
> }
> --
> 2.46.0
>
>