Re: [PATCH v4 bpf-next 06/11] tcp: Migrate TCP_NEW_SYN_RECV requests at retransmitting SYN+ACKs.

From: Martin KaFai Lau
Date: Wed May 05 2021 - 00:56:47 EST


On Tue, Apr 27, 2021 at 12:46:18PM +0900, Kuniyuki Iwashima wrote:
[ ... ]

> diff --git a/net/core/request_sock.c b/net/core/request_sock.c
> index 82cf9fbe2668..08c37ecd923b 100644
> --- a/net/core/request_sock.c
> +++ b/net/core/request_sock.c
> @@ -151,6 +151,7 @@ struct request_sock *reqsk_clone(struct request_sock *req, struct sock *sk)
> memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
> req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end));
>
> + sk_node_init(&nreq_sk->sk_node);
This belongs to patch 5.
"rsk_refcnt" also needs to be 0 instead of staying uninitialized
after reqsk_clone() returned.

> nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
> #ifdef CONFIG_XPS
> nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping;
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 851992405826..dc984d1f352e 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -695,10 +695,20 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
> }
> EXPORT_SYMBOL(inet_rtx_syn_ack);
>
> +static void reqsk_queue_migrated(struct request_sock_queue *queue,
> + const struct request_sock *req)
> +{
> + if (req->num_timeout == 0)
> + atomic_inc(&queue->young);
> + atomic_inc(&queue->qlen);
> +}
> +
> static void reqsk_migrate_reset(struct request_sock *req)
> {
> + req->saved_syn = NULL;
> + inet_rsk(req)->ireq_opt = NULL;
> #if IS_ENABLED(CONFIG_IPV6)
> - inet_rsk(req)->ipv6_opt = NULL;
> + inet_rsk(req)->pktopts = NULL;
> #endif
> }
>
> @@ -741,16 +751,37 @@ EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
>
> static void reqsk_timer_handler(struct timer_list *t)
> {
> - struct request_sock *req = from_timer(req, t, rsk_timer);
> - struct sock *sk_listener = req->rsk_listener;
> - struct net *net = sock_net(sk_listener);
> - struct inet_connection_sock *icsk = inet_csk(sk_listener);
> - struct request_sock_queue *queue = &icsk->icsk_accept_queue;
> + struct request_sock *req = from_timer(req, t, rsk_timer), *nreq = NULL, *oreq = req;
nit. This line is too long.
Lets move the new "*nreq" and "*oreg" to a new line and keep the current
"*req" line as is:
struct request_sock *req = from_timer(req, t, rsk_timer);
struct request_sock *oreq = req, *nreq = NULL;

> + struct sock *sk_listener = req->rsk_listener, *nsk = NULL;
"*nsk" can be moved into the following "!= TCP_LISTEN" case below.
Keep the current "*sk_listener" line as is.

> + struct inet_connection_sock *icsk;
> + struct request_sock_queue *queue;
> + struct net *net;
> int max_syn_ack_retries, qlen, expire = 0, resend = 0;
>
> - if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
> - goto drop;
> + if (inet_sk_state_load(sk_listener) != TCP_LISTEN) {

struct sock *nsk;

> + nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL);
> + if (!nsk)
> + goto drop;
> +
> + nreq = reqsk_clone(req, nsk);
> + if (!nreq)
> + goto drop;
> +
> + /* The new timer for the cloned req can decrease the 2
> + * by calling inet_csk_reqsk_queue_drop_and_put(), so
> + * hold another count to prevent use-after-free and
> + * call reqsk_put() just before return.
> + */
> + refcount_set(&nreq->rsk_refcnt, 2 + 1);
> + timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
> + reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req);
> +
> + req = nreq;
> + sk_listener = nsk;
> + }