Re: [PATCH net v2] tcp: avoid creating multiple req socks with the same tuples

From: maowenan
Date: Fri Jun 14 2019 - 00:24:38 EST


>>> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
>>> index 13ec7c3a9c49..fd45ed2fd985 100644
>>> --- a/net/ipv4/inet_connection_sock.c
>>> +++ b/net/ipv4/inet_connection_sock.c
>>> @@ -749,7 +749,7 @@ static void reqsk_timer_handler(struct timer_list *t)
>>> inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
>>> }
>>>
>>> -static void reqsk_queue_hash_req(struct request_sock *req,
>>> +static bool reqsk_queue_hash_req(struct request_sock *req,
>>> unsigned long timeout)
>>> {
>>> req->num_retrans = 0;
>>> @@ -759,19 +759,27 @@ static void reqsk_queue_hash_req(struct request_sock *req,
>>> timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
>>> mod_timer(&req->rsk_timer, jiffies + timeout);
>>>
>>> - inet_ehash_insert(req_to_sk(req), NULL);
>>> + if (!inet_ehash_insert(req_to_sk(req), NULL)) {
>>> + if (timer_pending(&req->rsk_timer))
>>> + del_timer_sync(&req->rsk_timer);
>>> + return false;
>>> + }
>>> /* before letting lookups find us, make sure all req fields
>>> * are committed to memory and refcnt initialized.
>>> */
>>> smp_wmb();
>>> refcount_set(&req->rsk_refcnt, 2 + 1);
>>> + return true;
>>> }
>>>
>>> -void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
>>> +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
>>> unsigned long timeout)
>>> {
>>> - reqsk_queue_hash_req(req, timeout);
>>> + if (!reqsk_queue_hash_req(req, timeout))
>>> + return false;
>>> +
>>> inet_csk_reqsk_queue_added(sk);
>>> + return true;
>>> }
>>> EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
>>>
>>> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
>>> index c4503073248b..b6a1b5334565 100644
>>> --- a/net/ipv4/inet_hashtables.c
>>> +++ b/net/ipv4/inet_hashtables.c
>>> @@ -477,6 +477,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk)
>>> struct inet_ehash_bucket *head;
>>> spinlock_t *lock;
>>> bool ret = true;
>>> + struct sock *reqsk = NULL;
>>>
>>> WARN_ON_ONCE(!sk_unhashed(sk));
>>>
>>> @@ -486,6 +487,18 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk)
>>> lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
>>>
>>> spin_lock(lock);
>>> + if (!osk)
>>> + reqsk = __inet_lookup_established(sock_net(sk), &tcp_hashinfo,
>>> + sk->sk_daddr, sk->sk_dport,
>>> + sk->sk_rcv_saddr, sk->sk_num,
>>> + sk->sk_bound_dev_if, sk->sk_bound_dev_if);
>>> + if (unlikely(reqsk)) {
>>
>> What reqsk would be a SYN_RECV socket, and not a ESTABLISH one (or a
>> TIME_WAIT ?)
>
> It wouldn't be SYN_RECV,ESTABLISH or TIME_WAIT, just TCP_NEW_SYN_RECV.
>
> When server receives the third handshake packet ACK, SYN_RECV sk will insert to hash with osk(!= NULL).
> The looking up here just avoid to create two or more request sk with TCP_NEW_SYN_RECV when receive syn packet.
>


@Eric, for this issue I only want to check TCP_NEW_SYN_RECV sk, is it OK like below?
+ if (!osk && sk->sk_state == TCP_NEW_SYN_RECV)
+ reqsk = __inet_lookup_established(sock_net(sk), &tcp_hashinfo,
+ sk->sk_daddr, sk->sk_dport,
+ sk->sk_rcv_saddr, sk->sk_num,
+ sk->sk_bound_dev_if, sk->sk_bound_dev_if);
+ if (unlikely(reqsk)) {




>>
>>> + ret = false;
>>> + reqsk_free(inet_reqsk(sk));
>>> + spin_unlock(lock);
>>> + return ret;
>>> + }
>>> +
>>> if (osk) {
>>
>> This test should have be a hint here : Sometime we _expect_ to have an
>> old socket (TIMEWAIT) and remove it
> I will check TIMEWAIT sk.
>>
>>
>>> WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
>>> ret = sk_nulls_del_node_init_rcu(osk);
>>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>>> index 38dfc308c0fb..358272394590 100644
>>> --- a/net/ipv4/tcp_input.c
>>> +++ b/net/ipv4/tcp_input.c
>>> @@ -6570,9 +6570,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
>>> sock_put(fastopen_sk);
>>> } else {
>>> tcp_rsk(req)->tfo_listener = false;
>>> - if (!want_cookie)
>>> - inet_csk_reqsk_queue_hash_add(sk, req,
>>> - tcp_timeout_init((struct sock *)req));
>>> + if (!want_cookie && !inet_csk_reqsk_queue_hash_add(sk, req,
>>> + tcp_timeout_init((struct sock *)req)))
>>> + return 0;
>>> +
>>> af_ops->send_synack(sk, dst, &fl, req, &foc,
>>> !want_cookie ? TCP_SYNACK_NORMAL :
>>> TCP_SYNACK_COOKIE);
>>> --
>>> 2.20.1
>>>
>>
>> I believe the proper fix is more complicated.
> yes, pretty complicated.
>>
>> Probably we need to move the locking in a less deeper location.


Currently, I find inet_ehash_insert is the most suitable location to do hash looking up,
because the sk's lock can be found from sk_hash, and there has already existed spin_lock code
In v1, I put the hash looking up in tcp_connect_request, there will be redundant lock to do looking up.


>
>>
>> (Also a similar fix would be needed in IPv6)
> ok

I find IPv6 has the same call trace, so this fix seems good to IPv6?
tcp_v6_conn_request
tcp_conn_request
inet_csk_reqsk_queue_hash_add
reqsk_queue_hash_req
inet_ehash_insert



>>
>> Thanks.
>>
>> .
>>