Re: [PATCH v4 bpf-next 2/2] selftests/bpf: add dump type data tests to btf dump tests

From: Andrii Nakryiko
Date: Fri Jun 04 2021 - 22:51:37 EST


On Tue, Jun 1, 2021 at 7:37 AM Alan Maguire <alan.maguire@xxxxxxxxxx> wrote:
>
> Test various type data dumping operations by comparing expected
> format with the dumped string; an snprintf-style printf function
> is used to record the string dumped. Also verify overflow handling
> where the data passed does not cover the full size of a type,
> such as would occur if a tracer has a portion of the 8k
> "struct task_struct".
>
> Signed-off-by: Alan Maguire <alan.maguire@xxxxxxxxxx>
> ---
> tools/testing/selftests/bpf/prog_tests/btf_dump.c | 638 ++++++++++++++++++++++
> 1 file changed, 638 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
> index 1b90e68..b78c308 100644
> --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
> +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
> @@ -232,6 +232,642 @@ void test_btf_dump_incremental(void)
> btf__free(btf);
> }
>
> +#define STRSIZE 4096
> +
> +void btf_dump_snprintf(void *ctx, const char *fmt, va_list args)

static

> +{
> + char *s = ctx, new[STRSIZE];
> +
> + vsnprintf(new, STRSIZE, fmt, args);
> + strncat(s, new, STRSIZE);
> +}
> +
> +/* skip "enum "/"struct " prefixes */
> +#define SKIP_PREFIX(_typestr, _prefix) \
> + do { \
> + if (strncmp(_typestr, _prefix, strlen(_prefix)) == 0) \
> + _typestr += strlen(_prefix) + 1; \
> + } while (0)
> +
> +int btf_dump_data(struct btf *btf, struct btf_dump *d,
> + char *name, __u64 flags, void *ptr,
> + size_t ptrsize, char *str, const char *expectedval)

static

naming nit: expected_val is much easier to read than expected_val,
same for ptrsize vs ptr_size; I'm also totally fine with shorter names
using common abbreviations (exp_val, ptr_sz, etc).

> +{
> + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
> + int ret = 0, cmp;
> + size_t typesize;
> + __s32 type_id;
> +
> + if (flags & BTF_F_COMPACT)
> + opts.compact = true;
> + if (flags & BTF_F_NONAME)
> + opts.skip_names = true;
> + if (flags & BTF_F_ZERO)
> + opts.emit_zeroes = true;

nothing wrong with this, but just as an FYI, you could have combined
that with DECLARE_LIBBPF_OPTS above:

DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts,
.compact = flags & BTF_F_COMPACT,
.skip_names = flags & BTF_F_NONAME,
.emit_zeroes = flags & BTF_F_ZERO,
);

> + SKIP_PREFIX(name, "enum");
> + SKIP_PREFIX(name, "struct");
> + SKIP_PREFIX(name, "union");
> + type_id = btf__find_by_name(btf, name);
> + if (CHECK(type_id <= 0, "find type id",
> + "no '%s' in BTF: %d\n", name, type_id)) {

see all the variations of ASSERT_XXX() macros, they are shorter, have
more natural checks and they output argument values automatically if
condition doesn't hold. So this one would be

if (!ASSERT_GT(type_id, 0, "type_id")) {
err = -ENOENT;
goto err;
}

> + ret = -ENOENT;
> + goto err;
> + }
> + typesize = btf__resolve_size(btf, type_id);
> + str[0] = '\0';
> + ret = btf_dump__dump_type_data(d, type_id, ptr, ptrsize, &opts);
> + if (typesize <= ptrsize) {
> + if (CHECK(ret != typesize, "btf_dump__dump_type_data",
> + "failed/unexpected typesize: %d\n", ret))
> + goto err;
> + } else {
> + if (CHECK(ret != -E2BIG, "btf_dump__dump_type_data -E2BIG",
> + "failed to return -E2BIG: %d\n", ret))
> + goto err;
> + ret = 0;
> + }
> +
> + cmp = strcmp(str, expectedval);
> + if (CHECK(cmp, "ensure expected/actual match",
> + "'%s' does not match expected '%s': %d\n",
> + str, expectedval, cmp))
> + ret = -EFAULT;

here ASSERT_STREQ() is useful

> +err:
> + if (ret < 0)
> + btf_dump__free(d);
> + return ret;
> +}
> +
> +#define TEST_BTF_DUMP_DATA(_b, _d, _str, _type, _flags, _expected, ...) \
> + do { \
> + char __ptrtype[64] = #_type; \
> + char *_ptrtype = (char *)__ptrtype; \
> + _type _ptrdata = __VA_ARGS__; \
> + void *_ptr = &_ptrdata; \
> + int _err; \
> + \
> + _err = btf_dump_data(_b, _d, _ptrtype, _flags, _ptr, \
> + sizeof(_type), _str, _expected); \
> + if (_err < 0) \
> + return _err; \
> + } while (0)
> +
> +/* Use where expected data string matches its stringified declaration */
> +#define TEST_BTF_DUMP_DATA_C(_b, _d, _str, _type, _flags, ...) \
> + TEST_BTF_DUMP_DATA(_b, _d, _str, _type, _flags, \
> + "(" #_type ")" #__VA_ARGS__, __VA_ARGS__)
> +
> +/* overflow test; pass typesize < expected type size, ensure E2BIG returned */
> +#define TEST_BTF_DUMP_DATA_OVER(_b, _d, _str, _type, _typesize, _expected, ...)\
> + do { \
> + char __ptrtype[64] = #_type; \
> + char *_ptrtype = (char *)__ptrtype; \
> + _type _ptrdata = __VA_ARGS__; \
> + void *_ptr = &_ptrdata; \
> + int _err; \
> + \
> + _err = btf_dump_data(_b, _d, _ptrtype, 0, _ptr, \
> + _typesize, _str, _expected); \
> + if (_err < 0) \
> + return _err; \
> + } while (0)
> +
> +#define TEST_BTF_DUMP_VAR(_b, _d, _str, _var, _type, _flags, _expected, ...) \
> + do { \
> + _type _ptrdata = __VA_ARGS__; \
> + void *_ptr = &_ptrdata; \
> + int _err; \
> + \
> + _err = btf_dump_data(_b, _d, _var, _flags, _ptr, \
> + sizeof(_type), _str, _expected); \
> + if (_err < 0) \
> + return _err; \
> + } while (0)
> +
> +int test_btf_dump_int_data(struct btf *btf, struct btf_dump *d, char *str)

static here and in a bunch of places below

> +{
> + /* simple int */
> + TEST_BTF_DUMP_DATA_C(btf, d, str, int, BTF_F_COMPACT, 1234);
> + TEST_BTF_DUMP_DATA(btf, d, str, int, BTF_F_COMPACT | BTF_F_NONAME,
> + "1234", 1234);
> + TEST_BTF_DUMP_DATA(btf, d, str, int, 0, "(int)1234\n", 1234);

do you think it's a good idea to append \n? it seems so simple for
user to do that, if necessary; on the other hand, if user doesn't want
it, they would need to do strlen() and overwriting last character,
which seems like a hassle

> +
> + /* zero value should be printed at toplevel */
> + TEST_BTF_DUMP_DATA(btf, d, str, int, BTF_F_COMPACT, "(int)0", 0);
> + TEST_BTF_DUMP_DATA(btf, d, str, int, BTF_F_COMPACT | BTF_F_NONAME,
> + "0", 0);
> + TEST_BTF_DUMP_DATA(btf, d, str, int, BTF_F_COMPACT | BTF_F_ZERO,
> + "(int)0", 0);
> + TEST_BTF_DUMP_DATA(btf, d, str, int,
> + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
> + "0", 0);
> + TEST_BTF_DUMP_DATA_C(btf, d, str, int, BTF_F_COMPACT, -4567);
> + TEST_BTF_DUMP_DATA(btf, d, str, int, BTF_F_COMPACT | BTF_F_NONAME,
> + "-4567", -4567);
> + TEST_BTF_DUMP_DATA(btf, d, str, int, 0, "(int)-4567\n", -4567);
> +
> + TEST_BTF_DUMP_DATA_OVER(btf, d, str, int, sizeof(int)-1, "", 1);
> +
> + return 0;
> +}
> +
> +/* since the kernel does not likely have any float types in its BTF, we
> + * will need to add some of various sizes.
> + */
> +#define TEST_ADD_FLOAT(_btf, _name, _sz) \
> + do { \
> + int _err; \
> + \
> + _err = btf__add_float(_btf, _name, _sz); \
> + if (CHECK(_err < 0, "btf__add_float", \
> + "could not add float of size %d: %d", \
> + _sz, _err)) \
> + return _err; \
> + } while (0)
> +
> +#define TEST_DUMP_FLOAT(_b, _d, _str, _type, _flags, _data, _sz, \
> + _expectedval) \
> + do { \
> + int _err; \
> + \
> + _err = btf_dump_data(_b, _d, _type, _flags, \
> + _data, _sz, _str, _expectedval); \
> + if (CHECK(_err < 0, "btf_dump float", \
> + "could not dump float data: %d\n", _err)) \
> + return _err; \
> + } while (0)
> +
> +int test_btf_dump_float_data(struct btf *btf, struct btf_dump *d, char *str)
> +{
> + float t1 = 1.234567;
> + float t2 = -1.234567;
> + float t3 = 0.0;
> + double t4 = 5.678912;
> + double t5 = -5.678912;
> + double t6 = 0.0;
> + long double t7 = 9.876543;
> + long double t8 = -9.876543;
> + long double t9 = 0.0;
> +
> + TEST_ADD_FLOAT(btf, "test_float", 4);

I don't get this love for macros. It makes debugging experience much
harder. It makes following the code harder. It doesn't save much
typing at all. TEST_BTF_DUMP_DATA at least adds some convenience, but
TEST_ADD_FLOAT and TEST_DUMP_FLOAT are completely useless, see below.

> + TEST_DUMP_FLOAT(btf, d, str, "test_float", 0, &t1, 4,
> + "(test_float)1.234567\n");
> + TEST_DUMP_FLOAT(btf, d, str, "test_float", 0, &t2, 4,
> + "(test_float)-1.234567\n");
> + TEST_DUMP_FLOAT(btf, d, str, "test_float", 0, &t3, 4,
> + "(test_float)0.000000\n");

ASSERT_OK(btf_dump_data(btf, d, "test_float", 0, &t1, 4, str,
"(test_float)1.234567\n"));
ASSERT_OK(btf_dump_data(btf, d, "test_float", 0, &t2, 4, str,
"(test_float)-1.234567\n"));
ASSERT_OK(btf_dump_data(btf, d, "test_float", 0, &t3, 4, str,
"(test_float)0.000000\n"));

It even saved some lines of code.

> +
> + TEST_ADD_FLOAT(btf, "test_double", 8);
> + TEST_DUMP_FLOAT(btf, d, str, "test_double", 0, &t4, 8,
> + "(test_double)5.678912\n");
> + TEST_DUMP_FLOAT(btf, d, str, "test_double", 0, &t5, 8,
> + "(test_double)-5.678912\n");
> + TEST_DUMP_FLOAT(btf, d, str, "test_double", 0, &t6, 8,
> + "(test_double)0.000000\n");
> +

[...]

> + TEST_BTF_DUMP_DATA(btf, d, str, struct btf_enum,
> + BTF_F_COMPACT | BTF_F_NONAME,
> + "{}",
> + { .name_off = 0, .val = 0,});
> + TEST_BTF_DUMP_DATA(btf, d, str, struct btf_enum, 0,
> + "(struct btf_enum){\n}\n",
> + { .name_off = 0, .val = 0,});
> + TEST_BTF_DUMP_DATA(btf, d, str, struct btf_enum,
> + BTF_F_COMPACT | BTF_F_ZERO,
> + "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
> + { .name_off = 0, .val = 0,});
> + TEST_BTF_DUMP_DATA(btf, d, str, struct btf_enum,
> + BTF_F_ZERO,
> + "(struct btf_enum){\n\t.name_off = (__u32)0,\n\t.val = (__s32)0,\n}\n",

while for primitive types and enums above are expected strings are
pretty easy to follow, for structs it starts to break apart. For
instance, I find

"(struct btf_enum){\
.name_off = (__u32)0,\
.val = (__s32)0,\
}\
",

much more legible (I can mentally ignore \ at the end quite easily).
This single line \n\t stuff just gets messier for bigger structs.

> + { .name_off = 0, .val = 0,});
> +
> + /* struct with pointers */
> + TEST_BTF_DUMP_DATA(btf, d, str, struct list_head, BTF_F_COMPACT,
> + "(struct list_head){.next = (struct list_head *)0x1,}",
> + { .next = (struct list_head *)1 });
> + TEST_BTF_DUMP_DATA(btf, d, str, struct list_head, 0,
> + "(struct list_head){\n\t.next = (struct list_head *)0x1,\n}\n",
> + { .next = (struct list_head *)1 });
> + /* NULL pointer should not be displayed */
> + TEST_BTF_DUMP_DATA(btf, d, str, struct list_head, BTF_F_COMPACT,
> + "(struct list_head){}",
> + { .next = (struct list_head *)0 });
> + TEST_BTF_DUMP_DATA(btf, d, str, struct list_head, 0,
> + "(struct list_head){\n}\n",
> + { .next = (struct list_head *)0 });
> +
> + /* struct with function pointers */
> + type_id = btf__find_by_name(btf, "file_operations");
> + if (CHECK(type_id <= 0, "find type id",
> + "no 'struct file_operations' in BTF: %d\n", type_id))
> + return -ENOENT;
> + typesize = btf__resolve_size(btf, type_id);
> + str[0] = '\0';
> +
> + ret = btf_dump__dump_type_data(d, type_id, fops, typesize, &opts);
> + if (CHECK(ret != typesize,
> + "dump file_operations is successful",
> + "unexpected return value dumping file_operations '%s': %d\n",
> + str, ret))
> + return -EINVAL;
> +
> + cmpstr = "(struct file_operations){\n\t.owner = (struct module *)0xffffffffffffffff,\n\t.llseek = (loff_t(*)(struct file *, loff_t, int))0xffffffffffffffff,";
> + cmp = strncmp(str, cmpstr, strlen(cmpstr));
> + if (CHECK(cmp != 0, "check file_operations dump",
> + "file_operations '%s' did not match expected\n",

cmpstr logging is missing here. But I also think it's not the first
time there was a need to validate only portiong of string equality, so
I wonder if we should just add ASSERT_STRNEQ(actual, expected, len)
variant, it should be trivial to add.

> + str))
> + return -EINVAL;
> +
> + /* struct with char array */
> + TEST_BTF_DUMP_DATA(btf, d, str, struct bpf_prog_info, BTF_F_COMPACT,
> + "(struct bpf_prog_info){.name = (char[])['f','o','o',],}",
> + { .name = "foo",});
> + TEST_BTF_DUMP_DATA(btf, d, str, struct bpf_prog_info,
> + BTF_F_COMPACT | BTF_F_NONAME,
> + "{['f','o','o',],}",
> + {.name = "foo",});
> + TEST_BTF_DUMP_DATA(btf, d, str, struct bpf_prog_info, 0,
> + "(struct bpf_prog_info){\n\t.name = (char[])[\n\t\t'f',\n\t\t\'o',\n\t\t'o',\n\t],\n}\n",
> + {.name = "foo",});
> + /* leading null char means do not display string */
> + TEST_BTF_DUMP_DATA(btf, d, str, struct bpf_prog_info, BTF_F_COMPACT,
> + "(struct bpf_prog_info){}",
> + {.name = {'\0', 'f', 'o', 'o'}});
> + /* handle non-printable characters */
> + TEST_BTF_DUMP_DATA(btf, d, str, struct bpf_prog_info, BTF_F_COMPACT,
> + "(struct bpf_prog_info){.name = (char[])[1,2,3,],}",
> + { .name = {1, 2, 3, 0}});
> +
> + /* struct with non-char array */
> + TEST_BTF_DUMP_DATA(btf, d, str, struct __sk_buff, BTF_F_COMPACT,
> + "(struct __sk_buff){.cb = (__u32[])[1,2,3,4,5,],}",
> + { .cb = {1, 2, 3, 4, 5,},});
> + TEST_BTF_DUMP_DATA(btf, d, str, struct __sk_buff,
> + BTF_F_COMPACT | BTF_F_NONAME,
> + "{[1,2,3,4,5,],}",
> + { .cb = { 1, 2, 3, 4, 5},});
> + TEST_BTF_DUMP_DATA(btf, d, str, struct __sk_buff, 0,
> + "(struct __sk_buff){\n\t.cb = (__u32[])[\n\t\t1,\n\t\t2,\n\t\t3,\n\t\t4,\n\t\t5,\n\t],\n}\n",

As you can see above in my example patch, emit_type_decl would emit
this array type as __u32[5]. I think drgn that was used as an
inspiration for this format also does that. So I think it's good to
stick to __u32[5] here.

> + { .cb = { 1, 2, 3, 4, 5},});
> + /* For non-char, arrays, show non-zero values only */
> + TEST_BTF_DUMP_DATA(btf, d, str, struct __sk_buff, BTF_F_COMPACT,
> + "(struct __sk_buff){.cb = (__u32[])[0,0,1,0,0,],}",
> + { .cb = { 0, 0, 1, 0, 0},});
> + TEST_BTF_DUMP_DATA(btf, d, str, struct __sk_buff, 0,
> + "(struct __sk_buff){\n\t.cb = (__u32[])[\n\t\t0,\n\t\t0,\n\t\t1,\n\t\t0,\n\t\t0,\n\t],\n}\n",
> + { .cb = { 0, 0, 1, 0, 0},});
> +
> + /* struct with bitfields */
> + TEST_BTF_DUMP_DATA_C(btf, d, str, struct bpf_insn, BTF_F_COMPACT,
> + {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
> + TEST_BTF_DUMP_DATA(btf, d, str, struct bpf_insn,
> + BTF_F_COMPACT | BTF_F_NONAME,
> + "{1,0x2,0x3,4,5,}",
> + { .code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
> + .imm = 5,});
> + TEST_BTF_DUMP_DATA(btf, d, str, struct bpf_insn, 0,
> + "(struct bpf_insn){\n\t.code = (__u8)1,\n\t.dst_reg = (__u8)0x2,\n\t.src_reg = (__u8)0x3,\n\t.off = (__s16)4,\n\t.imm = (__s32)5,\n}\n",
> + {.code = 1, .dst_reg = 2, .src_reg = 3, .off = 4, .imm = 5});
> +
> + /* zeroed bitfields should not be displayed */
> + TEST_BTF_DUMP_DATA(btf, d, str, struct bpf_insn, BTF_F_COMPACT,
> + "(struct bpf_insn){.dst_reg = (__u8)0x1,}",
> + { .code = 0, .dst_reg = 1});
> +
> + /* struct with enum bitfield */
> + type_id = btf__find_by_name(btf, "nft_cmp_expr");

This nft_cmp_expr breaks our CI ([0]) because we don't build kernels
with CONFIG_NF_TABLES=y. Can you please find some other struct that
would be in a core kernel configuration? If not, you can just do the
same trick as with floats and generate your own struct with bitfields.

[0] https://travis-ci.com/github/kernel-patches/bpf/builds/227876698

> + if (CHECK(type_id <= 0, "find nft_cmp_expr",
> + "no 'struct nft_cmp_expr' in BTF: %d\n", type_id))
> + return -ENOENT;
> + typesize = btf__resolve_size(btf, type_id);
> + str[0] = '\0';
> +

[...]

> +void test_btf_dump_data(void)
> +{
> + struct btf *btf = libbpf_find_kernel_btf();
> + char str[STRSIZE];
> + struct btf_dump_opts opts = { .ctx = str };
> + struct btf_dump *d;
> +
> + if (CHECK(!btf, "get kernel BTF", "no kernel BTF found"))
> + return;
> +
> + d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
> +
> + if (CHECK(!d, "new dump", "could not create BTF dump"))
> + return;
> +
> + /* Verify type display for various types. */
> + if (test_btf_dump_int_data(btf, d, str))
> + return;
> + if (test_btf_dump_float_data(btf, d, str))
> + return;
> + if (test_btf_dump_char_data(btf, d, str))
> + return;
> + if (test_btf_dump_typedef_data(btf, d, str))
> + return;
> + if (test_btf_dump_enum_data(btf, d, str))
> + return;
> + if (test_btf_dump_struct_data(btf, d, str))
> + return;
> + if (test_btf_dump_var_data(btf, d, str))
> + return;

it would be more convenient for each of those to be a subtest, and
there is no need to exit early if one of them fails, so don't return
early.

> + btf_dump__free(d);
> + btf__free(btf);
> +
> + /* verify datasec display */
> + if (test_btf_dump_datasec_data(str))
> + return;
> +
> +}
> +
> void test_btf_dump() {
> int i;
>
> @@ -245,4 +881,6 @@ void test_btf_dump() {
> }
> if (test__start_subtest("btf_dump: incremental"))
> test_btf_dump_incremental();
> + if (test__start_subtest("btf_dump: data"))

so instead of testing subtest name here, just do it for every
test_btf_dump_*_data above

> + test_btf_dump_data();
> }
> --
> 1.8.3.1
>