Re: [PATCH v5 net-next] net/tcp: trace all TCP/IP state transition with tcp_set_state tracepoint
From: Marcelo Ricardo Leitner
Date: Thu Dec 07 2017 - 15:03:00 EST
On Thu, Dec 07, 2017 at 02:10:42PM +0000, Yafang Shao wrote:
> The TCP/IP transition from TCP_LISTEN to TCP_SYN_RECV and some other
> transitions are not traced with tcp_set_state tracepoint.
>
> In order to trace the whole tcp lifespans, two helpers are introduced,
> void sk_set_state(struct sock *sk, int state);
> void sk_state_store(struct sock *sk, int newstate);
>
> When do TCP/IP state transition, we should use these two helpers or use
> tcp_set_state() other than assigning a value to sk_state directly.
>
> Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx>
> Acked-by: Song Liu <songliubraving@xxxxxx>
> Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@xxxxxxxxx>
> Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx>
> ---
> v4->v5: Trace only TCP sockets, whatever it is stream socket or raw socket.
> v3->v4: Do not trace DCCP socket
> v2->v3: Per suggestion from Marcelo Ricardo Leitner, inverting __
> to sk_state_store.
> ---
> include/net/sock.h | 8 ++++++--
> net/core/sock.c | 15 +++++++++++++++
> net/ipv4/inet_connection_sock.c | 5 +++--
> net/ipv4/inet_hashtables.c | 2 +-
> net/ipv4/tcp.c | 2 +-
> 5 files changed, 26 insertions(+), 6 deletions(-)
>
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 79e1a2c..1cf7685 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -2349,18 +2349,22 @@ static inline int sk_state_load(const struct sock *sk)
> }
>
> /**
> - * sk_state_store - update sk->sk_state
> + * __sk_state_store - update sk->sk_state
> * @sk: socket pointer
> * @newstate: new state
> *
> * Paired with sk_state_load(). Should be used in contexts where
> * state change might impact lockless readers.
> */
> -static inline void sk_state_store(struct sock *sk, int newstate)
> +static inline void __sk_state_store(struct sock *sk, int newstate)
> {
> smp_store_release(&sk->sk_state, newstate);
> }
>
> +/* For tcp_set_state tracepoint */
> +void sk_state_store(struct sock *sk, int newstate);
> +void sk_set_state(struct sock *sk, int state);
> +
> void sock_enable_timestamp(struct sock *sk, int flag);
> int sock_get_timestamp(struct sock *, struct timeval __user *);
> int sock_get_timestampns(struct sock *, struct timespec __user *);
> diff --git a/net/core/sock.c b/net/core/sock.c
> index c0b5b2f..61841a2 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -138,6 +138,7 @@
> #include <net/sock_reuseport.h>
>
> #include <trace/events/sock.h>
> +#include <trace/events/tcp.h>
>
> #include <net/tcp.h>
> #include <net/busy_poll.h>
> @@ -2859,6 +2860,20 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
> }
> EXPORT_SYMBOL(sock_get_timestampns);
>
> +void sk_state_store(struct sock *sk, int newstate)
> +{
> + if (sk->sk_protocol == IPPROTO_TCP)
> + trace_tcp_set_state(sk, sk->sk_state, newstate);
I think this is going in the wrong way. When Dave said to not define a
sock generic function in tcp code on v3, you moved it all from tcp.h
to sock.h. But now sock.h gets to deal with more tcp code, which also
isn't nice.
Instead, if you move it back to tcp.h but rename the function to
tcp_state_store (instead of the original sk_state_store), it won't be
a generic sock code and will fit nicely into tcp.h.
You may then even keep sk_state_store() as it is now, and just define
tcp_state_store():
tcp_state_store()
{
trace_tcp_...();
sk_state_store();
}
Making it very clear that this code is only to be used by tcp stack.
> + __sk_state_store(sk, newstate);
> +}
> +
> +void sk_set_state(struct sock *sk, int state)
Same about this one.
Dave?
> +{
> + if (sk->sk_protocol == IPPROTO_TCP)
> + trace_tcp_set_state(sk, sk->sk_state, state);
> + sk->sk_state = state;
> +}
> +
> void sock_enable_timestamp(struct sock *sk, int flag)
> {
> if (!sock_flag(sk, flag)) {
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 4ca46dc..41f9c87 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -783,7 +783,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
> if (newsk) {
> struct inet_connection_sock *newicsk = inet_csk(newsk);
>
> - newsk->sk_state = TCP_SYN_RECV;
> + sk_set_state(newsk, TCP_SYN_RECV);
> newicsk->icsk_bind_hash = NULL;
>
> inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
> @@ -888,7 +888,8 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
> return 0;
> }
>
> - sk->sk_state = TCP_CLOSE;
> + sk_set_state(sk, TCP_CLOSE);
> +
> return err;
> }
> EXPORT_SYMBOL_GPL(inet_csk_listen_start);
> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
> index f6f5810..5973693 100644
> --- a/net/ipv4/inet_hashtables.c
> +++ b/net/ipv4/inet_hashtables.c
> @@ -544,7 +544,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
> sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
> } else {
> percpu_counter_inc(sk->sk_prot->orphan_count);
> - sk->sk_state = TCP_CLOSE;
> + sk_set_state(sk, TCP_CLOSE);
> sock_set_flag(sk, SOCK_DEAD);
> inet_csk_destroy_sock(sk);
> }
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 1803116..ac98dc6 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -2065,7 +2065,7 @@ void tcp_set_state(struct sock *sk, int state)
> /* Change state AFTER socket is unhashed to avoid closed
> * socket sitting in hash tables.
> */
> - sk_state_store(sk, state);
> + __sk_state_store(sk, state);
>
> #ifdef STATE_TRACE
> SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
> --
> 1.8.3.1
>