[PATCH v3 net-next 2/5] net: tracepoint: replace tcp_set_state tracepoint with inet_sock_set_state tracepoint

From: Yafang Shao
Date: Tue Dec 19 2017 - 22:13:36 EST


As sk_state is a common field for struct sock, so the state
transition tracepoint should not be a TCP specific feature.
Currently it traces all AF_INET state transition, so I rename this
tracepoint to inet_sock_set_state tracepoint with some minor changes and move it
into trace/events/sock.h.
We dont need to create a file named trace/events/inet_sock.h for this one single
tracepoint.

Two helpers are introduced to trace sk_state transition
- void inet_sk_state_store(struct sock *sk, int newstate);
- void inet_sk_set_state(struct sock *sk, int state);
As trace header should not be included in other header files,
so they are defined in sock.c.

The protocol such as SCTP maybe compiled as a ko, hence export
inet_sk_set_state().

Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx>
---
include/net/inet_sock.h | 2 +
include/trace/events/sock.h | 107 ++++++++++++++++++++++++++++++++++++++++
include/trace/events/tcp.h | 31 ------------
net/ipv4/af_inet.c | 14 ++++++
net/ipv4/inet_connection_sock.c | 6 +--
net/ipv4/inet_hashtables.c | 2 +-
net/ipv4/tcp.c | 6 +--
7 files changed, 128 insertions(+), 40 deletions(-)

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 39efb96..a3431a4 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -290,6 +290,8 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
#endif

int inet_sk_rebuild_header(struct sock *sk);
+void inet_sk_set_state(struct sock *sk, int state);
+void inet_sk_state_store(struct sock *sk, int newstate);

static inline unsigned int __inet_ehashfn(const __be32 laddr,
const __u16 lport,
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index ec4dade..3b9094a 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -6,7 +6,50 @@
#define _TRACE_SOCK_H

#include <net/sock.h>
+#include <net/ipv6.h>
#include <linux/tracepoint.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+
+/* The protocol traced by sock_set_state */
+#define inet_protocol_names \
+ EM(IPPROTO_TCP) \
+ EM(IPPROTO_DCCP) \
+ EMe(IPPROTO_SCTP)
+
+#define tcp_state_names \
+ EM(TCP_ESTABLISHED) \
+ EM(TCP_SYN_SENT) \
+ EM(TCP_SYN_RECV) \
+ EM(TCP_FIN_WAIT1) \
+ EM(TCP_FIN_WAIT2) \
+ EM(TCP_TIME_WAIT) \
+ EM(TCP_CLOSE) \
+ EM(TCP_CLOSE_WAIT) \
+ EM(TCP_LAST_ACK) \
+ EM(TCP_LISTEN) \
+ EM(TCP_CLOSING) \
+ EMe(TCP_NEW_SYN_RECV)
+
+/* enums need to be exported to user space */
+#undef EM
+#undef EMe
+#define EM(a) TRACE_DEFINE_ENUM(a);
+#define EMe(a) TRACE_DEFINE_ENUM(a);
+
+inet_protocol_names
+tcp_state_names
+
+#undef EM
+#undef EMe
+#define EM(a) { a, #a },
+#define EMe(a) { a, #a }
+
+#define show_inet_protocol_name(val) \
+ __print_symbolic(val, inet_protocol_names)
+
+#define show_tcp_state_name(val) \
+ __print_symbolic(val, tcp_state_names)

TRACE_EVENT(sock_rcvqueue_full,

@@ -63,6 +106,70 @@
__entry->rmem_alloc)
);

+TRACE_EVENT(inet_sock_set_state,
+
+ TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
+
+ TP_ARGS(sk, oldstate, newstate),
+
+ TP_STRUCT__entry(
+ __field(const void *, skaddr)
+ __field(int, oldstate)
+ __field(int, newstate)
+ __field(__u16, sport)
+ __field(__u16, dport)
+ __field(__u8, protocol)
+ __array(__u8, saddr, 4)
+ __array(__u8, daddr, 4)
+ __array(__u8, saddr_v6, 16)
+ __array(__u8, daddr_v6, 16)
+ ),
+
+ TP_fast_assign(
+ struct inet_sock *inet = inet_sk(sk);
+ struct in6_addr *pin6;
+ __be32 *p32;
+
+ __entry->skaddr = sk;
+ __entry->oldstate = oldstate;
+ __entry->newstate = newstate;
+
+ __entry->protocol = sk->sk_protocol;
+ __entry->sport = ntohs(inet->inet_sport);
+ __entry->dport = ntohs(inet->inet_dport);
+
+ p32 = (__be32 *) __entry->saddr;
+ *p32 = inet->inet_saddr;
+
+ p32 = (__be32 *) __entry->daddr;
+ *p32 = inet->inet_daddr;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6) {
+ pin6 = (struct in6_addr *)__entry->saddr_v6;
+ *pin6 = sk->sk_v6_rcv_saddr;
+ pin6 = (struct in6_addr *)__entry->daddr_v6;
+ *pin6 = sk->sk_v6_daddr;
+ } else
+#endif
+ {
+ pin6 = (struct in6_addr *)__entry->saddr_v6;
+ ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
+ pin6 = (struct in6_addr *)__entry->daddr_v6;
+ ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
+ }
+ ),
+
+ TP_printk("protocol=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4"
+ "saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
+ show_inet_protocol_name(__entry->protocol),
+ __entry->sport, __entry->dport,
+ __entry->saddr, __entry->daddr,
+ __entry->saddr_v6, __entry->daddr_v6,
+ show_tcp_state_name(__entry->oldstate),
+ show_tcp_state_name(__entry->newstate))
+);
+
#endif /* _TRACE_SOCK_H */

/* This part must be outside protection */
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index ec52fb3..8e88a16 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -9,37 +9,6 @@
#include <linux/tracepoint.h>
#include <net/ipv6.h>

-#define tcp_state_names \
- EM(TCP_ESTABLISHED) \
- EM(TCP_SYN_SENT) \
- EM(TCP_SYN_RECV) \
- EM(TCP_FIN_WAIT1) \
- EM(TCP_FIN_WAIT2) \
- EM(TCP_TIME_WAIT) \
- EM(TCP_CLOSE) \
- EM(TCP_CLOSE_WAIT) \
- EM(TCP_LAST_ACK) \
- EM(TCP_LISTEN) \
- EM(TCP_CLOSING) \
- EMe(TCP_NEW_SYN_RECV) \
-
-/* enums need to be exported to user space */
-#undef EM
-#undef EMe
-#define EM(a) TRACE_DEFINE_ENUM(a);
-#define EMe(a) TRACE_DEFINE_ENUM(a);
-
-tcp_state_names
-
-#undef EM
-#undef EMe
-#define EM(a) tcp_state_name(a),
-#define EMe(a) tcp_state_name(a)
-
-#define tcp_state_name(state) { state, #state }
-#define show_tcp_state_name(val) \
- __print_symbolic(val, tcp_state_names)
-
/*
* tcp event with arguments sk and skb
*
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f00499a..bab98a4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -121,6 +121,7 @@
#endif
#include <net/l3mdev.h>

+#include <trace/events/sock.h>

/* The inetsw table contains everything that inet_create needs to
* build a new socket.
@@ -1220,6 +1221,19 @@ int inet_sk_rebuild_header(struct sock *sk)
}
EXPORT_SYMBOL(inet_sk_rebuild_header);

+void inet_sk_set_state(struct sock *sk, int state)
+{
+ trace_inet_sock_set_state(sk, sk->sk_state, state);
+ sk->sk_state = state;
+}
+EXPORT_SYMBOL(inet_sk_set_state);
+
+void inet_sk_state_store(struct sock *sk, int newstate)
+{
+ trace_inet_sock_set_state(sk, sk->sk_state, newstate);
+ smp_store_release(&sk->sk_state, newstate);
+}
+
struct sk_buff *inet_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4ca46dc..f460fc0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -783,7 +783,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);

- newsk->sk_state = TCP_SYN_RECV;
+ inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;

inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
@@ -877,7 +877,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
* It is OK, because this socket enters to hash table only
* after validation is complete.
*/
- sk_state_store(sk, TCP_LISTEN);
+ inet_sk_state_store(sk, TCP_LISTEN);
if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
inet->inet_sport = htons(inet->inet_num);

@@ -888,7 +888,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
return 0;
}

- sk->sk_state = TCP_CLOSE;
+ inet_sk_set_state(sk, TCP_CLOSE);
return err;
}
EXPORT_SYMBOL_GPL(inet_csk_listen_start);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f6f5810..37b7da0 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -544,7 +544,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
} else {
percpu_counter_inc(sk->sk_prot->orphan_count);
- sk->sk_state = TCP_CLOSE;
+ inet_sk_set_state(sk, TCP_CLOSE);
sock_set_flag(sk, SOCK_DEAD);
inet_csk_destroy_sock(sk);
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c470fec..d408fb4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -283,8 +283,6 @@
#include <asm/ioctls.h>
#include <net/busy_poll.h>

-#include <trace/events/tcp.h>
-
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);

@@ -2040,8 +2038,6 @@ void tcp_set_state(struct sock *sk, int state)
{
int oldstate = sk->sk_state;

- trace_tcp_set_state(sk, oldstate, state);
-
switch (state) {
case TCP_ESTABLISHED:
if (oldstate != TCP_ESTABLISHED)
@@ -2065,7 +2061,7 @@ void tcp_set_state(struct sock *sk, int state)
/* Change state AFTER socket is unhashed to avoid closed
* socket sitting in hash tables.
*/
- sk_state_store(sk, state);
+ inet_sk_state_store(sk, state);

#ifdef STATE_TRACE
SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
--
1.8.3.1