Re: [RFC PATCH bpf-next 00/12] bpf: Introduce static-defined tracing probe for BPF
From: Leon Hwang
Date: Mon Jun 29 2026 - 01:27:10 EST
On 29/6/26 11:27, Xu Kuohai wrote:
> On 6/29/2026 10:14 AM, Leon Hwang wrote:
>> Hi Kuohai,
>>
>> On 28/6/26 06:51, Xu Kuohai wrote:
>>> From: Xu Kuohai <xukuohai@xxxxxxxxxx>
>>>
>>> This series introduces static-defined tracing probes for BPF programs.
>>> BPF SDT (static-defined tracing) works similarly to USDT. User defines
>>
>>
>> At first glance, the SDT idea looks cool to me.
>>
>> However, what's your purpose of introducing SDT?
>>
>
> Well, the purpose is to add a dynamic, zero-overhead tracing mechanism for
> bpf, not just at function entry, but anywhere inside the prog source code.
>
Better to carry the purpose in cover letter in the future.
>> If to provide points in bpf progs to be traced, like tracepoints in
>> kernel functions, I think subprog+fentry is an alternative approach.
>> Comparing with SDT, subprog+fentry requires a function call at run time,
>> instead of a NOP like SDT.
>>
[...]
>>
>> Furthermore, if users don't want a function call at run time, e.g. they
>> don't want to call 'my_trace' at run time in production, they can patch
>> the callsite of 'my_trace' with NOP before loading 'xdp_prog', and drop
>> the subprog 'my_trace' in their user space application. This elimination
>> is approachable, since it is used heavily in bpfsnoop [1].
>
> Sounds like the subprog+fentry you described gives a good evidance for real
> demand of dyanmic tracing inside function body.
Correct.
A subprog in an existing bpf prog can be used to inspect the prog's
runtime details, including the 'tail_call_cnt' on the stack.
An extra subprog as stub is better for dynamic tracing.
See my blog post:
https://blog.leonhw.com/post/ebpf-talk-138-debug-tailcall-bug-with-fentry/.
>
> IIUC, even though the CALL instruction at the callsite is patched to NOP at
> runtime, the argument preparation instructions - r1 = len, r2 = ctx -
> remain
Correct.
> in the callsite. For SDT, the argument preparation is recorded as metadata
> out of line, and is never executed.
So, does argument preparation require the verifier to analyze the
registers to identify the argument registers, when an SDT is defined in
a prog? What if the verifier cannot identify them?
>
> And I think SDT is cleaner and easier to use. User just declares the
> prototype
> and insert the probe, no need to hack with subprog+fentry.
>
>> However, this elimination is not easy to understand. Want me to show >
>> mored etails about this elimination?
>>
>
> That would be appreciated, thanks.
>
static __noinline void
subprog(int len, int ret)
{
__sink(len);
__sink(ret);
}
SEC("xdp")
int xdp_fn(struct xdp_md *ctx)
{
struct ethhdr *eth = (struct ethhdr *)(ctx_ptr(ctx, data));
struct iphdr *iph = (struct iphdr *)(eth + 1);
int len = ctx->data_end - ctx->data;
int ret = XDP_PASS;
if ((void *)(iph + 1) > ctx_ptr(ctx, data_end))
return ret;
if (iph->protocol != IPPROTO_ICMP)
return ret;
barrier_var(ret);
subprog(len, ret);
return ret;
}
After attaching 'xdp_fn' to 'lo',
===
without the elimination:
bpftool p d x n xdp_fn
int xdp_fn(struct xdp_md * ctx):
; int xdp_fn(struct xdp_md *ctx)
0: (b7) r0 = 2
; struct ethhdr *eth = (struct ethhdr *)(ctx_ptr(ctx, data));
1: (79) r2 = *(u64 *)(r1 +0)
; int len = ctx->data_end - ctx->data;
2: (79) r1 = *(u64 *)(r1 +8)
; if ((void *)(iph + 1) > ctx_ptr(ctx, data_end))
3: (bf) r3 = r2
4: (07) r3 += 34
; if ((void *)(iph + 1) > ctx_ptr(ctx, data_end))
5: (2d) if r3 > r1 goto pc+10
6: (b7) r3 = 9
; struct iphdr *iph = (struct iphdr *)(eth + 1);
7: (bf) r4 = r2
8: (0f) r4 += r3
; if (iph->protocol != IPPROTO_ICMP)
9: (71) r3 = *(u8 *)(r4 +14)
; if (iph->protocol != IPPROTO_ICMP)
10: (55) if r3 != 0x1 goto pc+5
11: (1f) r1 -= r2
12: (b7) r6 = 2
; subprog(len, ret);
13: (bf) r2 = r6
14: (85) call pc+2#bpf_prog_6a2f766e16102c10_subprog
15: (bf) r0 = r6
; }
16: (95) exit
void subprog(int len, int ret):
; subprog(int len, int ret)
17: (63) *(u32 *)(r10 -8) = r2
18: (63) *(u32 *)(r10 -4) = r1
; __sink(len);
19: (63) *(u32 *)(r10 -4) = r1
; __sink(ret);
20: (63) *(u32 *)(r10 -8) = r2
; }
21: (95) exit
bpftool p d j n xdp_fn
int xdp_fn(struct xdp_md * ctx):
bpf_prog_6480db4581c3a618_xdp_fn:
; int xdp_fn(struct xdp_md *ctx)
0: nopl (%rax,%rax)
5: nop
7: pushq %rbp
8: movq %rsp, %rbp
b: pushq %rbx
c: movl $2, %eax
; struct ethhdr *eth = (struct ethhdr *)(ctx_ptr(ctx, data));
11: movq (%rdi), %rsi
; int len = ctx->data_end - ctx->data;
15: movq 8(%rdi), %rdi
; if ((void *)(iph + 1) > ctx_ptr(ctx, data_end))
19: movq %rsi, %rdx
1c: addq $34, %rdx
; if ((void *)(iph + 1) > ctx_ptr(ctx, data_end))
20: cmpq %rdi, %rdx
23: ja 0xffffffffc0000926
25: movl $9, %edx
; struct iphdr *iph = (struct iphdr *)(eth + 1);
2a: movq %rsi, %rcx
2d: addq %rdx, %rcx
; if (iph->protocol != IPPROTO_ICMP)
30: movzbq 14(%rcx), %rdx
; if (iph->protocol != IPPROTO_ICMP)
35: cmpq $1, %rdx
39: jne 0xffffffffc0000926
3b: subq %rsi, %rdi
3e: movl $2, %ebx
; subprog(len, ret);
43: movq %rbx, %rsi
46: callq 0xffffffffc00009d0
4b: movq %rbx, %rax
; }
4e: popq %rbx
4f: leave
50: retq
51: int3
void subprog(int len, int ret):
bpf_prog_6a2f766e16102c10_subprog:
; subprog(int len, int ret)
0: nopl (%rax,%rax)
5: nop
7: pushq %rbp
8: movq %rsp, %rbp
b: subq $8, %rsp
12: movl %esi, -8(%rbp)
15: movl %edi, -4(%rbp)
; __sink(len);
18: movl %edi, -4(%rbp)
; __sink(ret);
1b: movl %esi, -8(%rbp)
; }
1e: leave
1f: retq
20: int3
===
with the elimination:
bpftool p d x n xdp_fn
int xdp_fn(struct xdp_md * ctx):
; int xdp_fn(struct xdp_md *ctx)
0: (b7) r0 = 2
; struct ethhdr *eth = (struct ethhdr *)(ctx_ptr(ctx, data));
1: (79) r2 = *(u64 *)(r1 +0)
; int len = ctx->data_end - ctx->data;
2: (79) r1 = *(u64 *)(r1 +8)
; if ((void *)(iph + 1) > ctx_ptr(ctx, data_end))
3: (bf) r3 = r2
4: (07) r3 += 34
; if ((void *)(iph + 1) > ctx_ptr(ctx, data_end))
5: (2d) if r3 > r1 goto pc+9
6: (b7) r3 = 9
; struct iphdr *iph = (struct iphdr *)(eth + 1);
7: (bf) r4 = r2
8: (0f) r4 += r3
; if (iph->protocol != IPPROTO_ICMP)
9: (71) r3 = *(u8 *)(r4 +14)
; if (iph->protocol != IPPROTO_ICMP)
10: (55) if r3 != 0x1 goto pc+4
11: (1f) r1 -= r2
12: (b7) r6 = 2
; subprog(len, ret);
13: (bf) r2 = r6
14: (bf) r0 = r6
; }
15: (95) exit
bpftool p d j n xdp_fn
int xdp_fn(struct xdp_md * ctx):
bpf_prog_861d0ecc72ad8d9e_xdp_fn:
; int xdp_fn(struct xdp_md *ctx)
0: nopl (%rax,%rax)
5: nop
7: pushq %rbp
8: movq %rsp, %rbp
b: pushq %rbx
c: movl $2, %eax
; struct ethhdr *eth = (struct ethhdr *)(ctx_ptr(ctx, data));
11: movq (%rdi), %rsi
; int len = ctx->data_end - ctx->data;
15: movq 8(%rdi), %rdi
; if ((void *)(iph + 1) > ctx_ptr(ctx, data_end))
19: movq %rsi, %rdx
1c: addq $34, %rdx
; if ((void *)(iph + 1) > ctx_ptr(ctx, data_end))
20: cmpq %rdi, %rdx
23: ja 0xffffffffc0000929
25: movl $9, %edx
; struct iphdr *iph = (struct iphdr *)(eth + 1);
2a: movq %rsi, %rcx
2d: addq %rdx, %rcx
; if (iph->protocol != IPPROTO_ICMP)
30: movzbq 14(%rcx), %rdx
; if (iph->protocol != IPPROTO_ICMP)
35: cmpq $1, %rdx
39: jne 0xffffffffc0000929
3b: subq %rsi, %rdi
3e: movl $2, %ebx
; subprog(len, ret);
43: movq %rbx, %rsi
46: movq %rbx, %rax
; }
49: popq %rbx
4a: leave
4b: retq
4c: int3
Demo source code:
https://github.com/Asphaltt/learn-by-example/tree/main/ebpf/eliminate-subprog
With the elimination, the callsite of 'subprog' has been eliminated.
However, as you mentioned above, the argument preparation insns are kept.
Thanks,
Leon