[PATCH net-next 3/5] mptcp: annotate lockless access for RX path fields

From: Matthieu Baerts (NGI0)
Date: Fri Feb 02 2024 - 06:43:28 EST


From: Paolo Abeni <pabeni@xxxxxxxxxx>

The following fields:

- ack_seq
- snd_una
- wnd_end
- rmem_fwd_alloc

are protected by the data lock end accessed lockless in a few
spots. Ensure ONCE annotation for write (under such lock) and for
lockless read.

Signed-off-by: Paolo Abeni <pabeni@xxxxxxxxxx>
Reviewed-by: Mat Martineau <martineau@xxxxxxxxxx>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@xxxxxxxxxx>
---
net/mptcp/options.c | 6 +++---
net/mptcp/protocol.c | 19 +++++++++++--------
2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 9b31a0a06265..801a3525230d 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1030,7 +1030,7 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
{
msk->bytes_acked += new_snd_una - msk->snd_una;
- msk->snd_una = new_snd_una;
+ WRITE_ONCE(msk->snd_una, new_snd_una);
}

static void ack_update_msk(struct mptcp_sock *msk,
@@ -1057,7 +1057,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;

if (after64(new_wnd_end, msk->wnd_end))
- msk->wnd_end = new_wnd_end;
+ WRITE_ONCE(msk->wnd_end, new_wnd_end);

/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
if (after64(msk->wnd_end, snd_nxt))
@@ -1071,7 +1071,7 @@ static void ack_update_msk(struct mptcp_sock *msk,

trace_ack_update_msk(mp_opt->data_ack,
old_snd_una, new_snd_una,
- new_wnd_end, msk->wnd_end);
+ new_wnd_end, READ_ONCE(msk->wnd_end));
}

bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b1c24ac3630f..90804e7fb7f6 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -410,6 +410,7 @@ static void mptcp_close_wake_up(struct sock *sk)
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
}

+/* called under the msk socket lock */
static bool mptcp_pending_data_fin_ack(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -441,16 +442,17 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
}
}

+/* can be called with no lock acquired */
static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
{
struct mptcp_sock *msk = mptcp_sk(sk);

if (READ_ONCE(msk->rcv_data_fin) &&
- ((1 << sk->sk_state) &
+ ((1 << inet_sk_state_load(sk)) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) {
u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq);

- if (msk->ack_seq == rcv_data_fin_seq) {
+ if (READ_ONCE(msk->ack_seq) == rcv_data_fin_seq) {
if (seq)
*seq = rcv_data_fin_seq;

@@ -748,7 +750,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
msk->bytes_received += end_seq - msk->ack_seq;
- msk->ack_seq = end_seq;
+ WRITE_ONCE(msk->ack_seq, end_seq);
moved = true;
}
return moved;
@@ -985,6 +987,7 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag)
put_page(dfrag->page);
}

+/* called under both the msk socket lock and the data lock */
static void __mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2110,7 +2113,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk)

skb = skb_peek(&msk->receive_queue);
if (skb) {
- u64 hint_val = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq;
+ u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq;

if (hint_val >= INT_MAX)
return INT_MAX;
@@ -2754,7 +2757,7 @@ static void __mptcp_init_sock(struct sock *sk)
__skb_queue_head_init(&msk->receive_queue);
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
- msk->rmem_fwd_alloc = 0;
+ WRITE_ONCE(msk->rmem_fwd_alloc, 0);
WRITE_ONCE(msk->rmem_released, 0);
msk->timer_ival = TCP_RTO_MIN;
msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
@@ -2970,7 +2973,7 @@ static void __mptcp_destroy_sock(struct sock *sk)

sk->sk_prot->destroy(sk);

- WARN_ON_ONCE(msk->rmem_fwd_alloc);
+ WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc));
WARN_ON_ONCE(msk->rmem_released);
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
@@ -3204,8 +3207,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,

WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- msk->snd_una = msk->write_seq;
- msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+ WRITE_ONCE(msk->snd_una, msk->write_seq);
+ WRITE_ONCE(msk->wnd_end, msk->snd_nxt + req->rsk_rcv_wnd);
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
mptcp_init_sched(msk, mptcp_sk(sk)->sched);


--
2.43.0