Re: [PATCH] net: tcp_drop adds `reason` parameter for tracing

From: Eric Dumazet
Date: Wed Aug 25 2021 - 11:39:56 EST


On Tue, Aug 24, 2021 at 5:08 PM Zhongya Yan <yan2228598786@xxxxxxxxx> wrote:
>
> Cool, thanks!
> i will do it

Since these drops are hardly hot path, why not simply use a string ?
An ENUM will not really help grep games.

tcp_drop(sk, skb, "csum error");

The const char * argument would not be used unless tracepoint is
active, but who cares.

(Speaking of csum errors, they are not currently calling tcp_drop(),
but we could unify all packet drops to use this tracepoint,
and get rid of adhoc ones, like trace_tcp_bad_csum()

>
> Steven Rostedt <rostedt@xxxxxxxxxxx> 于2021年8月24日周二 下午11:30写道:
>>
>> On Tue, 24 Aug 2021 05:51:40 -0700
>> Zhongya Yan <yan2228598786@xxxxxxxxx> wrote:
>>
>> > When using `tcp_drop(struct sock *sk, struct sk_buff *skb)` we can
>> > not tell why we need to delete `skb`. To solve this problem I updated the
>> > method `tcp_drop(struct sock *sk, struct sk_buff *skb, enum tcp_drop_reason reason)`
>> > to include the source of the deletion when it is done, so you can
>> > get an idea of the reason for the deletion based on the source.
>> >
>> > The current purpose is mainly derived from the suggestions
>> > of `Yonghong Song` and `brendangregg`:
>> >
>> > https://github.com/iovisor/bcc/issues/3533.
>> >
>> > "It is worthwhile to mention the context/why we want to this
>> > tracepoint with bcc issue https://github.com/iovisor/bcc/issues/3533.
>> > Mainly two reasons: (1). tcp_drop is a tiny function which
>> > may easily get inlined, a tracepoint is more stable, and (2).
>> > tcp_drop does not provide enough information on why it is dropped.
>> > " by Yonghong Song
>> >
>> > Signed-off-by: Zhongya Yan <yan2228598786@xxxxxxxxx>
>> > ---
>> > include/net/tcp.h | 11 ++++++++
>> > include/trace/events/tcp.h | 56 ++++++++++++++++++++++++++++++++++++++
>> > net/ipv4/tcp_input.c | 34 +++++++++++++++--------
>> > 3 files changed, 89 insertions(+), 12 deletions(-)
>> >
>> > diff --git a/include/net/tcp.h b/include/net/tcp.h
>> > index 784d5c3ef1c5..dd8cd8d6f2f1 100644
>> > --- a/include/net/tcp.h
>> > +++ b/include/net/tcp.h
>> > @@ -254,6 +254,17 @@ extern atomic_long_t tcp_memory_allocated;
>> > extern struct percpu_counter tcp_sockets_allocated;
>> > extern unsigned long tcp_memory_pressure;
>> >
>> > +enum tcp_drop_reason {
>> > + TCP_OFO_QUEUE = 1,
>> > + TCP_DATA_QUEUE_OFO = 2,
>> > + TCP_DATA_QUEUE = 3,
>> > + TCP_PRUNE_OFO_QUEUE = 4,
>> > + TCP_VALIDATE_INCOMING = 5,
>> > + TCP_RCV_ESTABLISHED = 6,
>> > + TCP_RCV_SYNSENT_STATE_PROCESS = 7,
>> > + TCP_RCV_STATE_PROCESS = 8
>>
>> As enums increase by one, you could save yourself the burden of
>> updating the numbers and just have:
>>
>> enum tcp_drop_reason {
>> TCP_OFO_QUEUE = 1,
>> TCP_DATA_QUEUE_OFO,
>> TCP_DATA_QUEUE,
>> TCP_PRUNE_OFO_QUEUE,
>> TCP_VALIDATE_INCOMING,
>> TCP_RCV_ESTABLISHED,
>> TCP_RCV_SYNSENT_STATE_PROCESS,
>> TCP_RCV_STATE_PROCESS
>> };
>>
>> Which would do the same.
>>
>>
>> > +};
>> > +
>> > /* optimized version of sk_under_memory_pressure() for TCP sockets */
>> > static inline bool tcp_under_memory_pressure(const struct sock *sk)
>> > {
>> > diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
>> > index 521059d8dc0a..a0d3d31eb591 100644
>> > --- a/include/trace/events/tcp.h
>> > +++ b/include/trace/events/tcp.h
>> > @@ -371,6 +371,62 @@ DEFINE_EVENT(tcp_event_skb, tcp_bad_csum,
>> > TP_ARGS(skb)
>> > );
>> >
>>
>> If you would like to see the english translation of what these
>> "reasons" are and not have to remember which number is which, you can
>> do the following:
>>
>> #define TCP_DROP_REASON \
>> EM(TCP_OFO_QUEUE, ofo_queue) \
>> EM(TCP_DATA_QUEUE_OFO, data_queue_ofo) \
>> EM(TCP_DATA_QUEUE, data_queue) \
>> EM(TCP_PRUNE_OFO_QUEUE, prune_ofo_queue) \
>> EM(TCP_VALIDATE_INCOMING, validate_incoming) \
>> EM(TCP_RCV_ESTABLISHED, rcv_established) \
>> EM(TCP_RCV_SYNSENT_STATE_PROCESS, rcv_synsent_state_process) \
>> EMe(TCP_RCV_STATE_PROCESS, rcv_state_proces)
>>
>> #undef EM
>> #undef EMe
>>
>> #define EM(a,b) { a, #b },
>> #define EMe(a, b) { a, #b }
>>
>>
>> > +TRACE_EVENT(tcp_drop,
>> > + TP_PROTO(struct sock *sk, struct sk_buff *skb, enum tcp_drop_reason reason),
>> > +
>> > + TP_ARGS(sk, skb, reason),
>> > +
>> > + TP_STRUCT__entry(
>> > + __array(__u8, saddr, sizeof(struct sockaddr_in6))
>> > + __array(__u8, daddr, sizeof(struct sockaddr_in6))
>> > + __field(__u16, sport)
>> > + __field(__u16, dport)
>> > + __field(__u32, mark)
>> > + __field(__u16, data_len)
>> > + __field(__u32, snd_nxt)
>> > + __field(__u32, snd_una)
>> > + __field(__u32, snd_cwnd)
>> > + __field(__u32, ssthresh)
>> > + __field(__u32, snd_wnd)
>> > + __field(__u32, srtt)
>> > + __field(__u32, rcv_wnd)
>> > + __field(__u64, sock_cookie)
>> > + __field(__u32, reason)
>> > + ),
>> > +
>> > + TP_fast_assign(
>> > + const struct tcphdr *th = (const struct tcphdr *)skb->data;
>> > + const struct inet_sock *inet = inet_sk(sk);
>> > + const struct tcp_sock *tp = tcp_sk(sk);
>> > +
>> > + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
>> > + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
>> > +
>> > + TP_STORE_ADDR_PORTS(__entry, inet, sk);
>> > +
>> > + __entry->sport = ntohs(inet->inet_sport);
>> > + __entry->dport = ntohs(inet->inet_dport);
>> > + __entry->mark = skb->mark;
>> > +
>> > + __entry->data_len = skb->len - __tcp_hdrlen(th);
>> > + __entry->snd_nxt = tp->snd_nxt;
>> > + __entry->snd_una = tp->snd_una;
>> > + __entry->snd_cwnd = tp->snd_cwnd;
>> > + __entry->snd_wnd = tp->snd_wnd;
>> > + __entry->rcv_wnd = tp->rcv_wnd;
>> > + __entry->ssthresh = tcp_current_ssthresh(sk);
>> > + __entry->srtt = tp->srtt_us >> 3;
>> > + __entry->sock_cookie = sock_gen_cookie(sk);
>> > + __entry->reason = reason;
>> > + ),
>> > +
>> > + TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx reason=%d",
>>
>> Then above you can have: "reason=%s"
>>
>> > + __entry->saddr, __entry->daddr, __entry->mark,
>> > + __entry->data_len, __entry->snd_nxt, __entry->snd_una,
>> > + __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
>> > + __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie, __entry->reason)
>>
>> And here:
>>
>> __print_symbolic(__entry->reason, TCP_DROP_REASON)
>>
>> -- Steve
>>
>>
>> > +);
>> > +
>> > #endif /* _TRACE_TCP_H */
>> >
>> > /* This part must be outside protection */
>> >