Re: [PATCH net-next] trace: tcp: Add tracepoint for tcp_sendmsg()

From: David Ahern
Date: Wed Feb 26 2025 - 12:17:44 EST


On 2/26/25 9:10 AM, Breno Leitao wrote:
>> Also, if a tracepoint is added, inside of tcp_sendmsg_locked would cover
>> more use cases (see kernel references to it).
>
> Agree, this seems to provide more useful information
>
>> We have a patch for a couple years now with a tracepoint inside the
>
> Sorry, where do you have this patch? is it downstream?

company tree. Attached. Where to put tracepoints and what arguments to
supply so that it is beneficial to multiple users is always a touchy
subject :-), so I have not tried to push the patch out. sock arg should
be added to it for example.

The key is to see how tcp_sendmsg_locked breaks up the buffers, and then
another one in tcp_write_xmit to see when the actual push out happens.
At the time I was looking at latency in the stack - from sendmsg call to
driver pushing descriptors to hardware.From 2298ca66c15bae6a95698abd8d029b9271fbefa3 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@xxxxxxxxx>
Date: Fri, 24 Mar 2023 22:07:53 +0000
Subject: [PATCH] tcp: Add tracepoints to tcp_write_xmit and tcp_sendmsg_locked

Signed-off-by: David Ahern <dsahern@xxxxxxxxx>
---
include/trace/events/tcp.h | 57 ++++++++++++++++++++++++++++++++++++++
net/ipv4/tcp.c | 4 +++
net/ipv4/tcp_output.c | 1 +
3 files changed, 62 insertions(+)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 901b440238d5..6b19e1d4d79d 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -187,6 +187,63 @@ DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
TP_ARGS(sk)
);

+TRACE_EVENT(tcp_sendmsg_locked,
+
+ TP_PROTO(struct msghdr *msg, struct sk_buff *skb, int size_goal),
+
+ TP_ARGS(msg, skb, size_goal),
+
+ TP_STRUCT__entry(
+ __field(__u64, skb)
+ __field(int, skb_len)
+ __field(int, msg_left)
+ __field(int, size_goal)
+ ),
+
+ TP_fast_assign(
+ __entry->skb = (__u64)skb;
+ __entry->skb_len = skb ? skb->len : 0;
+ __entry->msg_left = msg_data_left(msg);
+ __entry->size_goal = size_goal;
+ ),
+
+ TP_printk("skb %llx len %d msg_left %d size_goal %d",
+ __entry->skb, __entry->skb_len,
+ __entry->msg_left, __entry->size_goal)
+);
+
+TRACE_EVENT(tcp_write_xmit,
+
+ TP_PROTO(struct sock *sk, unsigned int mss_now, int nonagle, u32 max_segs),
+
+ TP_ARGS(sk, mss_now, nonagle,max_segs),
+
+ TP_STRUCT__entry(
+ __field(__u64, tcp_wstamp_ns)
+ __field(__u64, tcp_clock_cache)
+ __field(unsigned int, mss_now)
+ __field(unsigned int, max_segs)
+ __field(int, nonagle)
+ __field(int, sk_pacing)
+ ),
+
+ TP_fast_assign(
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ __entry->mss_now = mss_now;
+ __entry->nonagle = nonagle;
+ __entry->max_segs = max_segs;
+ __entry->sk_pacing = tcp_needs_internal_pacing(sk);
+ __entry->tcp_wstamp_ns = tp->tcp_wstamp_ns;
+ __entry->tcp_clock_cache = tp->tcp_clock_cache;
+ ),
+
+ TP_printk("mss %u segs %u nonagle %d sk_pacing %d tcp_wstamp_ns %lld tcp_clock_cache %lld",
+ __entry->mss_now, __entry->max_segs,
+ __entry->nonagle, __entry->sk_pacing,
+ __entry->tcp_wstamp_ns, __entry->tcp_clock_cache)
+);
+
TRACE_EVENT(tcp_retransmit_synack,

TP_PROTO(const struct sock *sk, const struct request_sock *req),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6bb8eb803105..0fa2c8e035b7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -279,6 +279,7 @@
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <net/busy_poll.h>
+#include <trace/events/tcp.h>

/* Track pending CMSGs. */
enum {
@@ -1312,6 +1313,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
if (skb)
copy = size_goal - skb->len;

+ trace_tcp_sendmsg_locked(msg, skb, size_goal);
+
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 74190518708a..c84f18cd9b7e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2623,6 +2623,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
}

max_segs = tcp_tso_segs(sk, mss_now);
+ trace_tcp_write_xmit(sk, mss_now, nonagle, max_segs);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;

--
2.43.0