[PATCH net 03/10] mptcp: close TOCTOU race while computing rcv_wnd

From: Matthieu Baerts (NGI0)

Date: Sun May 31 2026 - 23:14:42 EST


From: Paolo Abeni <pabeni@xxxxxxxxxx>

The MPTCP output path access locklessly the MPTCP-level ack_seq
in multiple times, using possibly different values for the data_ack
in the DSS option and to compute the announced rcv wnd for the same
packet.

Refactor the cote to avoid inconsistencies which may confuse the
peer. Also ensure that the MPTCP level rcv wnd is updated only when
the egress packet actually contains a DSS ack.

Fixes: fa3fe2b15031 ("mptcp: track window announced to peer")
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Paolo Abeni <pabeni@xxxxxxxxxx>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@xxxxxxxxxx>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@xxxxxxxxxx>
---
net/mptcp/options.c | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 8a1c5698983c..5c228344e83f 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -570,7 +570,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
struct mptcp_ext *mpext;
unsigned int ack_size;
bool ret = false;
- u64 ack_seq;

opts->csum_reqd = READ_ONCE(msk->csum_enabled);
mpext = skb ? mptcp_get_ext(skb) : NULL;
@@ -601,14 +600,11 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
return ret;
}

- ack_seq = READ_ONCE(msk->ack_seq);
if (READ_ONCE(msk->use_64bit_ack)) {
ack_size = TCPOLEN_MPTCP_DSS_ACK64;
- opts->ext_copy.data_ack = ack_seq;
opts->ext_copy.ack64 = 1;
} else {
ack_size = TCPOLEN_MPTCP_DSS_ACK32;
- opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
opts->ext_copy.ack64 = 0;
}
opts->ext_copy.use_ack = 1;
@@ -1297,19 +1293,14 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return true;
}

-static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
+static u64 mptcp_set_rwin(struct mptcp_sock *msk, struct tcp_sock *tp,
+ struct tcphdr *th, u64 ack_seq)
{
const struct sock *ssk = (const struct sock *)tp;
- struct mptcp_subflow_context *subflow;
- u64 ack_seq, rcv_wnd_old, rcv_wnd_new;
- struct mptcp_sock *msk;
+ u64 rcv_wnd_old, rcv_wnd_new;
u32 new_win;
u64 win;

- subflow = mptcp_subflow_ctx(ssk);
- msk = mptcp_sk(subflow->conn);
-
- ack_seq = READ_ONCE(msk->ack_seq);
rcv_wnd_new = ack_seq + tp->rcv_wnd;

rcv_wnd_old = atomic64_read(&msk->rcv_wnd_sent);
@@ -1362,7 +1353,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)

update_wspace:
WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
- subflow->rcv_wnd_sent = rcv_wnd_new;
+ return rcv_wnd_new;
}

static void mptcp_track_rwin(struct tcp_sock *tp)
@@ -1474,13 +1465,25 @@ void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);

if (mpext->use_ack) {
+ struct mptcp_sock *msk;
+ u64 ack_seq;
+
+ /* DSS option is set only by mptcp_established_option,
+ * the caller is __tcp_transmit_skb() and ssk is always
+ * not NULL.
+ */
+ subflow = mptcp_subflow_ctx(ssk);
+ msk = mptcp_sk(subflow->conn);
+ ack_seq = READ_ONCE(msk->ack_seq);
if (mpext->ack64) {
- put_unaligned_be64(mpext->data_ack, ptr);
+ put_unaligned_be64(ack_seq, ptr);
ptr += 2;
} else {
- put_unaligned_be32(mpext->data_ack32, ptr);
+ put_unaligned_be32(ack_seq, ptr);
ptr += 1;
}
+ subflow->rcv_wnd_sent = mptcp_set_rwin(msk, tp, th,
+ ack_seq);
}

if (mpext->use_map) {
@@ -1708,9 +1711,6 @@ void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
i += 4;
}
}
-
- if (tp)
- mptcp_set_rwin(tp, th);
}

__be32 mptcp_get_reset_option(const struct sk_buff *skb)

--
2.53.0