Re: [PATCH net-next 05/11] tcp: allow mptcp to drop TS for some packets
From: Eric Dumazet
Date: Mon Jun 01 2026 - 02:05:24 EST
On Sun, May 31, 2026 at 10:25 PM Matthieu Baerts (NGI0)
<matttbe@xxxxxxxxxx> wrote:
>
> With TCP-timestamps (padded) taking 12 bytes and ADD_ADDR IPv6 + port
> taking 30 bytes, the 40-byte limit for the TCP options is reached. In
> this case, it is then not possible to send the address signal.
>
> The idea is to let MPTCP dropping the TCP-timestamps option for some
> specific packets, to be able to send some specific pure ACK carrying >28
> bytes of MPTCP options, like with this specific ADD_ADDR. A new
> parameter is passed from tcp_established_options to the MPTCP side to
> indicate if the TCP TS option is used, and if it should be dropped. The
> next commit implements the part on MPTCP side, but split into two
> patches to help TCP maintainers to identify the modifications on TCP
> side. This feature will be controlled by a new add_addr_v6_port_drop_ts
> MPTCP sysctl knob.
>
> It is important to keep in mind that dropping the TCP timestamps option
> for one packet of the connection could eventually disrupt some
> middleboxes: even if it should be unlikely, they could drop the packet
> or even block the connection. That's why this new feature will be
> controlled by a sysctl knob.
>
> Note that it would be technically possible to squeeze both options into
> the header if the ADD_ADDR is first written, and then the TCP timestamps
> without the NOPs preceding it. But this means more modifications on TCP
> side, plus some middleboxes could still be disrupted by that.
>
> About the implementation, instead of passing a new boolean (drop_ts),
> another option would be to pass the whole option structure (opts),
> but 'struct tcp_out_options' is currently defined in tcp_output.c, and
> would need to be exported. Plus that means the removal of the TCP TS
> option would be done on the MPTCP side, and not here on the TCP side.
> It feels clearer to remove other TCP options from the TCP side, than
> hiding that from the MPTCP side.
>
> Yet an other alternative would be to pass the size already taken by the
> other TCP options, and have a way to drop them all when needed. But this
> feels better to target only the timestamps option where dropping it
> should be safe, even if it is currently the only option that would be
> set before MPTCP, when MPTCP is used.
>
> Reviewed-by: Mat Martineau <martineau@xxxxxxxxxx>
> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@xxxxxxxxxx>
> ---
> To: Neal Cardwell <ncardwell@xxxxxxxxxx>
> To: Kuniyuki Iwashima <kuniyu@xxxxxxxxxx>
> ---
> include/net/mptcp.h | 3 ++-
> net/ipv4/tcp_output.c | 6 +++++-
> net/mptcp/options.c | 3 ++-
> 3 files changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index f7263fe2a2e4..000b6593bfa4 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -151,7 +151,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
> struct mptcp_out_options *opts);
> bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> unsigned int *size, unsigned int remaining,
> - struct mptcp_out_options *opts);
> + bool *drop_ts, struct mptcp_out_options *opts);
> bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);
>
> void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
> @@ -270,6 +270,7 @@ static inline bool mptcp_established_options(struct sock *sk,
> struct sk_buff *skb,
> unsigned int *size,
> unsigned int remaining,
> + bool *drop_ts,
> struct mptcp_out_options *opts)
> {
> return false;
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index ef0c10cd31c7..53ee4c8f5f8c 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -1181,12 +1181,16 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
> */
> if (sk_is_mptcp(sk)) {
> unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
> + bool drop_ts = opts->options & OPTION_TS;
> unsigned int opt_size = 0;
>
> if (mptcp_established_options(sk, skb, &opt_size, remaining,
> - &opts->mptcp)) {
> + &drop_ts, &opts->mptcp)) {
> opts->options |= OPTION_MPTCP;
> size += opt_size;
> +
> + if (drop_ts)
> + opts->options &= ~OPTION_TS;
> }
> }
Passing local variables' addresses forces the compiler to use a stack
canary in this hot function, even for non-MPTCP flows.
I was about to test the following patch, which removes the current
stack canary caused by MPTCP :/
$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-92 (-92)
Function old new delta
tcp_options_write.isra 1423 1407 -16
mptcp_established_options 2746 2720 -26
tcp_established_options 553 503 -50
Total: Before=22110750, After=22110658, chg -0.00%
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index f7263fe2a2e40b507257c3720cc2d78d37357d6d..f55838fd6cca308908607243735f8768540bb419
100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -149,9 +149,9 @@ bool mptcp_syn_options(struct sock *sk, const
struct sk_buff *skb,
unsigned int *size, struct mptcp_out_options *opts);
bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
struct mptcp_out_options *opts);
-bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
- unsigned int *size, unsigned int remaining,
- struct mptcp_out_options *opts);
+u32 mptcp_established_options(struct sock *sk, struct sk_buff *skb,
+ unsigned int remaining,
+ struct mptcp_out_options *opts);
bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);
void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
@@ -266,13 +266,13 @@ static inline bool mptcp_synack_options(const
struct request_sock *req,
return false;
}
-static inline bool mptcp_established_options(struct sock *sk,
- struct sk_buff *skb,
- unsigned int *size,
- unsigned int remaining,
- struct mptcp_out_options *opts)
+static inline u32 mptcp_established_options(struct sock *sk,
+ struct sk_buff *skb,
+ unsigned int *size,
+ unsigned int remaining,
+ struct mptcp_out_options *opts)
{
- return false;
+ return 0;
}
static inline bool mptcp_incoming_options(struct sock *sk,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ef0c10cd31c71ff585a937fde37f2b08b1214b5a..594ec6ba02d5413d43842f79aefbf4d8355c4f3f
100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1183,10 +1183,11 @@ static unsigned int
tcp_established_options(struct sock *sk, struct sk_buff *skb
unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
unsigned int opt_size = 0;
- if (mptcp_established_options(sk, skb, &opt_size, remaining,
- &opts->mptcp)) {
+ opt_size = mptcp_established_options(sk, skb, remaining,
+ &opts->mptcp);
+ if (opt_size) {
opts->options |= OPTION_MPTCP;
- size += opt_size;
+ size += (opt_size & 63);
}
}
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index d96130e49942e2fb878cd1897ad43c1d420fb233..503ebd71d562134431cf0ea33276c035bddae00c
100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -49,7 +49,7 @@ static struct mptcp_pernet *mptcp_get_pernet(const
struct net *net)
int mptcp_is_enabled(const struct net *net)
{
- return mptcp_get_pernet(net)->mptcp_enabled;
+ return READ_ONCE(mptcp_get_pernet(net)->mptcp_enabled);
}
unsigned int mptcp_get_add_addr_timeout(const struct net *net)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 8a1c5698983cff3082d68290626dd8f1e044527f..4ac01cecb6bd965f1f95f6f2342515eb2b7591f5
100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -836,15 +836,15 @@ static bool
mptcp_established_options_mp_fail(struct sock *sk,
return true;
}
-bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
- unsigned int *size, unsigned int remaining,
- struct mptcp_out_options *opts)
+u32 mptcp_established_options(struct sock *sk, struct sk_buff *skb,
+ unsigned int remaining,
+ struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
unsigned int opt_size = 0;
+ u32 total_size = 0;
bool snd_data_fin;
- bool ret = false;
opts->suboptions = 0;
@@ -852,34 +852,33 @@ bool mptcp_established_options(struct sock *sk,
struct sk_buff *skb,
* option space.
*/
if (unlikely(__mptcp_check_fallback(msk) &&
!mptcp_check_infinite_map(skb)))
- return true;
+ return 64;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
if (mptcp_established_options_fastclose(sk, &opt_size,
remaining, opts) ||
mptcp_established_options_mp_fail(sk, &opt_size,
remaining, opts)) {
- *size += opt_size;
+ total_size += opt_size;
remaining -= opt_size;
}
/* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if
there is room */
if (mptcp_established_options_rst(sk, skb, &opt_size,
remaining, opts)) {
- *size += opt_size;
+ total_size += opt_size;
remaining -= opt_size;
}
- return true;
+ return 64 + total_size;;
}
snd_data_fin = mptcp_data_fin_enabled(msk);
if (mptcp_established_options_mp(sk, skb, snd_data_fin,
&opt_size, opts))
- ret = true;
+ total_size += 64;
else if (mptcp_established_options_dss(sk, skb, snd_data_fin,
&opt_size, opts)) {
unsigned int mp_fail_size;
- ret = true;
if (mptcp_established_options_mp_fail(sk, &mp_fail_size,
remaining -
opt_size, opts)) {
- *size += opt_size + mp_fail_size;
+ total_size += opt_size + mp_fail_size;
remaining -= opt_size - mp_fail_size;
- return true;
+ return total_size;
}
}
@@ -887,27 +886,24 @@ bool mptcp_established_options(struct sock *sk,
struct sk_buff *skb,
* TCP option space would be fatal
*/
if (WARN_ON_ONCE(opt_size > remaining))
- return false;
+ return 0;
- *size += opt_size;
+ total_size += opt_size;
remaining -= opt_size;
if (mptcp_established_options_add_addr(sk, skb, &opt_size,
remaining, opts)) {
- *size += opt_size;
+ total_size += opt_size;
remaining -= opt_size;
- ret = true;
} else if (mptcp_established_options_rm_addr(sk, &opt_size,
remaining, opts)) {
- *size += opt_size;
+ total_size += opt_size;
remaining -= opt_size;
- ret = true;
}
if (mptcp_established_options_mp_prio(sk, &opt_size, remaining, opts)) {
- *size += opt_size;
+ total_size += opt_size;
remaining -= opt_size;
- ret = true;
}
- return ret;
+ return total_size;
}
bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,