Re: [PATCH v4 bpf-next 06/11] tcp: Migrate TCP_NEW_SYN_RECV requests at retransmitting SYN+ACKs.
From: Kuniyuki Iwashima
Date: Wed May 05 2021 - 19:17:14 EST
From: Martin KaFai Lau <kafai@xxxxxx>
Date: Tue, 4 May 2021 21:56:18 -0700
> On Tue, Apr 27, 2021 at 12:46:18PM +0900, Kuniyuki Iwashima wrote:
> [ ... ]
>
> > diff --git a/net/core/request_sock.c b/net/core/request_sock.c
> > index 82cf9fbe2668..08c37ecd923b 100644
> > --- a/net/core/request_sock.c
> > +++ b/net/core/request_sock.c
> > @@ -151,6 +151,7 @@ struct request_sock *reqsk_clone(struct request_sock *req, struct sock *sk)
> > memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
> > req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end));
> >
> > + sk_node_init(&nreq_sk->sk_node);
> This belongs to patch 5.
> "rsk_refcnt" also needs to be 0 instead of staying uninitialized
> after reqsk_clone() returned.
I'll move this part to patch 5 and initialize refcnt as 0 in reqsk_clone()
like reqsk_alloc().
>
> > nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
> > #ifdef CONFIG_XPS
> > nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping;
> > diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> > index 851992405826..dc984d1f352e 100644
> > --- a/net/ipv4/inet_connection_sock.c
> > +++ b/net/ipv4/inet_connection_sock.c
> > @@ -695,10 +695,20 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
> > }
> > EXPORT_SYMBOL(inet_rtx_syn_ack);
> >
> > +static void reqsk_queue_migrated(struct request_sock_queue *queue,
> > + const struct request_sock *req)
> > +{
> > + if (req->num_timeout == 0)
> > + atomic_inc(&queue->young);
> > + atomic_inc(&queue->qlen);
> > +}
> > +
> > static void reqsk_migrate_reset(struct request_sock *req)
> > {
> > + req->saved_syn = NULL;
> > + inet_rsk(req)->ireq_opt = NULL;
> > #if IS_ENABLED(CONFIG_IPV6)
> > - inet_rsk(req)->ipv6_opt = NULL;
> > + inet_rsk(req)->pktopts = NULL;
> > #endif
> > }
> >
> > @@ -741,16 +751,37 @@ EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
> >
> > static void reqsk_timer_handler(struct timer_list *t)
> > {
> > - struct request_sock *req = from_timer(req, t, rsk_timer);
> > - struct sock *sk_listener = req->rsk_listener;
> > - struct net *net = sock_net(sk_listener);
> > - struct inet_connection_sock *icsk = inet_csk(sk_listener);
> > - struct request_sock_queue *queue = &icsk->icsk_accept_queue;
> > + struct request_sock *req = from_timer(req, t, rsk_timer), *nreq = NULL, *oreq = req;
> nit. This line is too long.
> Lets move the new "*nreq" and "*oreg" to a new line and keep the current
> "*req" line as is:
> struct request_sock *req = from_timer(req, t, rsk_timer);
> struct request_sock *oreq = req, *nreq = NULL;
I'll fix that.
>
> > + struct sock *sk_listener = req->rsk_listener, *nsk = NULL;
> "*nsk" can be moved into the following "!= TCP_LISTEN" case below.
> Keep the current "*sk_listener" line as is.
I'll move the nsk's definition.
Thank you.
>
> > + struct inet_connection_sock *icsk;
> > + struct request_sock_queue *queue;
> > + struct net *net;
> > int max_syn_ack_retries, qlen, expire = 0, resend = 0;
> >
> > - if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
> > - goto drop;
> > + if (inet_sk_state_load(sk_listener) != TCP_LISTEN) {
>
> struct sock *nsk;
>
> > + nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL);
> > + if (!nsk)
> > + goto drop;
> > +
> > + nreq = reqsk_clone(req, nsk);
> > + if (!nreq)
> > + goto drop;
> > +
> > + /* The new timer for the cloned req can decrease the 2
> > + * by calling inet_csk_reqsk_queue_drop_and_put(), so
> > + * hold another count to prevent use-after-free and
> > + * call reqsk_put() just before return.
> > + */
> > + refcount_set(&nreq->rsk_refcnt, 2 + 1);
> > + timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
> > + reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req);
> > +
> > + req = nreq;
> > + sk_listener = nsk;
> > + }