[PATCH net-next 22/37] rxrpc: Store the DATA serial in the txqueue and use this in RTT calc

From: David Howells
Date: Mon Dec 02 2024 - 09:41:30 EST


Store the serial number set on a DATA packet at the point of transmission
in the rxrpc_txqueue struct and when an ACK is received, match the
reference number in the ACK by trawling the txqueue rather than sharing an
RTT table with ACK RTT. This can be done as part of Tx queue rotation.

This means we have a lot more RTT samples available and is faster to search
with all the serial numbers packed together into a few cachelines rather
than being hung off different txbufs.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: Marc Dionne <marc.dionne@xxxxxxxxxxxx>
cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
cc: Eric Dumazet <edumazet@xxxxxxxxxx>
cc: Jakub Kicinski <kuba@xxxxxxxxxx>
cc: Paolo Abeni <pabeni@xxxxxxxxxx>
cc: linux-afs@xxxxxxxxxxxxxxxxxxx
cc: netdev@xxxxxxxxxxxxxxx
---
include/trace/events/rxrpc.h | 14 ++----
net/rxrpc/ar-internal.h | 4 ++
net/rxrpc/call_event.c | 8 +--
net/rxrpc/input.c | 94 +++++++++++++++++++++++-------------
net/rxrpc/output.c | 6 ++-
5 files changed, 79 insertions(+), 47 deletions(-)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 609522a5bd0f..798bea0853c4 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -337,11 +337,10 @@
E_(rxrpc_rtt_tx_ping, "PING")

#define rxrpc_rtt_rx_traces \
- EM(rxrpc_rtt_rx_other_ack, "OACK") \
+ EM(rxrpc_rtt_rx_data_ack, "DACK") \
EM(rxrpc_rtt_rx_obsolete, "OBSL") \
EM(rxrpc_rtt_rx_lost, "LOST") \
- EM(rxrpc_rtt_rx_ping_response, "PONG") \
- E_(rxrpc_rtt_rx_requested_ack, "RACK")
+ E_(rxrpc_rtt_rx_ping_response, "PONG")

#define rxrpc_timer_traces \
EM(rxrpc_timer_trace_delayed_ack, "DelayAck ") \
@@ -1695,10 +1694,9 @@ TRACE_EVENT(rxrpc_retransmit,
);

TRACE_EVENT(rxrpc_congest,
- TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary,
- rxrpc_serial_t ack_serial),
+ TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary),

- TP_ARGS(call, summary, ack_serial),
+ TP_ARGS(call, summary),

TP_STRUCT__entry(
__field(unsigned int, call)
@@ -1706,7 +1704,6 @@ TRACE_EVENT(rxrpc_congest,
__field(rxrpc_seq_t, hard_ack)
__field(rxrpc_seq_t, top)
__field(rxrpc_seq_t, lowest_nak)
- __field(rxrpc_serial_t, ack_serial)
__field(u16, nr_sacks)
__field(u16, nr_snacks)
__field(u16, cwnd)
@@ -1722,7 +1719,6 @@ TRACE_EVENT(rxrpc_congest,
__entry->hard_ack = call->acks_hard_ack;
__entry->top = call->tx_top;
__entry->lowest_nak = call->acks_lowest_nak;
- __entry->ack_serial = ack_serial;
__entry->nr_sacks = call->acks_nr_sacks;
__entry->nr_snacks = call->acks_nr_snacks;
__entry->cwnd = call->cong_cwnd;
@@ -1734,7 +1730,7 @@ TRACE_EVENT(rxrpc_congest,

TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u A=%u+%u/%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
__entry->call,
- __entry->ack_serial,
+ __entry->sum.acked_serial,
__print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names),
__entry->hard_ack,
__print_symbolic(__entry->ca_state, rxrpc_ca_states),
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 9a5eb6fa1dd1..e68d0ecc4866 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -769,6 +769,7 @@ struct rxrpc_call {
* Summary of a new ACK and the changes it made to the Tx buffer packet states.
*/
struct rxrpc_ack_summary {
+ rxrpc_serial_t acked_serial; /* Serial number ACK'd */
u16 in_flight; /* Number of unreceived transmissions */
u16 nr_new_hacks; /* Number of rotated new ACKs */
u16 nr_new_sacks; /* Number of new soft ACKs in packet */
@@ -777,6 +778,7 @@ struct rxrpc_ack_summary {
bool new_low_snack:1; /* T if new low soft NACK found */
bool retrans_timeo:1; /* T if reTx due to timeout happened */
bool need_retransmit:1; /* T if we need transmission */
+ bool rtt_sample_avail:1; /* T if RTT sample available */
u8 /*enum rxrpc_congest_change*/ change;
};

@@ -859,12 +861,14 @@ struct rxrpc_txqueue {
unsigned long segment_acked; /* Bit-per-buf: Set if ACK'd */
unsigned long segment_lost; /* Bit-per-buf: Set if declared lost */
unsigned long segment_retransmitted; /* Bit-per-buf: Set if retransmitted */
+ unsigned long rtt_samples; /* Bit-per-buf: Set if available for RTT */

/* The arrays we want to pack into as few cache lines as possible. */
struct {
#define RXRPC_NR_TXQUEUE BITS_PER_LONG
#define RXRPC_TXQ_MASK (RXRPC_NR_TXQUEUE - 1)
struct rxrpc_txbuf *bufs[RXRPC_NR_TXQUEUE];
+ unsigned int segment_serial[RXRPC_NR_TXQUEUE];
unsigned int segment_xmit_ts[RXRPC_NR_TXQUEUE];
} ____cacheline_aligned;
};
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 887470fb28a4..48bc06842f99 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -159,11 +159,11 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_
rxrpc_seq_t stop = earliest(tq_top, call->tx_transmitted);

_debug("unrep %x-%x", start, stop);
- for (rxrpc_seq_t seq = start; before(seq, stop); seq++) {
- struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK];
+ for (rxrpc_seq_t seq = start; before_eq(seq, stop); seq++) {
+ rxrpc_serial_t serial = tq->segment_serial[seq & RXRPC_TXQ_MASK];

if (ping_response &&
- before(txb->serial, call->acks_highest_serial))
+ before(serial, call->acks_highest_serial))
break; /* Wasn't accounted for by a more recent ping. */
req.tq = tq;
req.seq = seq;
@@ -197,7 +197,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_

_debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now));
call->resend_at = resend_at;
- trace_rxrpc_timer_set(call, resend_at - req.now,
+ trace_rxrpc_timer_set(call, ktime_sub(resend_at, req.now),
rxrpc_timer_trace_resend_reset);
} else {
call->resend_at = KTIME_MAX;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 5aadc087794e..5226d26294db 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -30,9 +30,7 @@ static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq,
* Do TCP-style congestion management [RFC 5681].
*/
static void rxrpc_congestion_management(struct rxrpc_call *call,
- struct sk_buff *skb,
- struct rxrpc_ack_summary *summary,
- rxrpc_serial_t acked_serial)
+ struct rxrpc_ack_summary *summary)
{
summary->change = rxrpc_cong_no_change;
summary->in_flight = (call->tx_top - call->tx_bottom) - call->acks_nr_sacks;
@@ -44,7 +42,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
if (call->cong_cwnd >= call->cong_ssthresh &&
call->cong_ca_state == RXRPC_CA_SLOW_START) {
call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE;
- call->cong_tstamp = skb->tstamp;
+ call->cong_tstamp = call->acks_latest_ts;
call->cong_cumul_acks = 0;
}
}
@@ -62,7 +60,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
call->cong_cwnd += 1;
if (call->cong_cwnd >= call->cong_ssthresh) {
call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE;
- call->cong_tstamp = skb->tstamp;
+ call->cong_tstamp = call->acks_latest_ts;
}
goto out;

@@ -75,12 +73,12 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
*/
if (call->peer->rtt_count == 0)
goto out;
- if (ktime_before(skb->tstamp,
+ if (ktime_before(call->acks_latest_ts,
ktime_add_us(call->cong_tstamp,
call->peer->srtt_us >> 3)))
goto out_no_clear_ca;
summary->change = rxrpc_cong_rtt_window_end;
- call->cong_tstamp = skb->tstamp;
+ call->cong_tstamp = call->acks_latest_ts;
if (call->cong_cumul_acks >= call->cong_cwnd)
call->cong_cwnd++;
goto out;
@@ -137,7 +135,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
summary->change = rxrpc_cong_cleared_nacks;
call->cong_dup_acks = 0;
call->cong_extra = 0;
- call->cong_tstamp = skb->tstamp;
+ call->cong_tstamp = call->acks_latest_ts;
if (call->cong_cwnd < call->cong_ssthresh)
call->cong_ca_state = RXRPC_CA_SLOW_START;
else
@@ -147,7 +145,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
out_no_clear_ca:
if (call->cong_cwnd >= RXRPC_TX_MAX_WINDOW)
call->cong_cwnd = RXRPC_TX_MAX_WINDOW;
- trace_rxrpc_congest(call, summary, acked_serial);
+ trace_rxrpc_congest(call, summary);
return;

packet_loss_detected:
@@ -194,11 +192,29 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call)
call->cong_cwnd = umax(call->cong_cwnd / 2, RXRPC_MIN_CWND);
}

+/*
+ * Add an RTT sample derived from an ACK'd DATA packet.
+ */
+static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct rxrpc_txqueue *tq,
+ int ix,
+ rxrpc_serial_t ack_serial)
+{
+ rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_data_ack, -1,
+ summary->acked_serial, ack_serial,
+ ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]),
+ call->acks_latest_ts);
+ summary->rtt_sample_avail = false;
+ __clear_bit(ix, &tq->rtt_samples); /* Prevent repeat RTT sample */
+}
+
/*
* Apply a hard ACK by advancing the Tx window.
*/
static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
- struct rxrpc_ack_summary *summary)
+ struct rxrpc_ack_summary *summary,
+ rxrpc_serial_t ack_serial)
{
struct rxrpc_txqueue *tq = call->tx_queue;
rxrpc_seq_t seq = call->tx_bottom + 1;
@@ -236,6 +252,11 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
rot_last = true;
}

+ if (summary->rtt_sample_avail &&
+ summary->acked_serial == tq->segment_serial[ix] &&
+ test_bit(ix, &tq->rtt_samples))
+ rxrpc_add_data_rtt_sample(call, summary, tq, ix, ack_serial);
+
if (ix == tq->nr_reported_acks) {
/* Packet directly hard ACK'd. */
tq->nr_reported_acks++;
@@ -348,7 +369,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call)
}

if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
- if (!rxrpc_rotate_tx_window(call, top, &summary)) {
+ if (!rxrpc_rotate_tx_window(call, top, &summary, 0)) {
rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply);
return false;
}
@@ -800,6 +821,19 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb
})
#endif

+/*
+ * Deal with RTT samples from soft ACKs.
+ */
+static void rxrpc_input_soft_rtt(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct rxrpc_txqueue *tq,
+ rxrpc_serial_t ack_serial)
+{
+ for (int ix = 0; ix < RXRPC_NR_TXQUEUE; ix++)
+ if (summary->acked_serial == tq->segment_serial[ix])
+ return rxrpc_add_data_rtt_sample(call, summary, tq, ix, ack_serial);
+}
+
/*
* Process a batch of soft ACKs specific to a transmission queue segment.
*/
@@ -909,6 +943,8 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call,

_debug("bound %16lx %u", extracted, nr);

+ if (summary->rtt_sample_avail)
+ rxrpc_input_soft_rtt(call, summary, tq, sp->hdr.serial);
rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE,
seq - RXRPC_NR_TXQUEUE, &lowest_nak);
extracted = ~0UL;
@@ -980,7 +1016,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_ack_summary summary = { 0 };
struct rxrpc_acktrailer trailer;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- rxrpc_serial_t ack_serial, acked_serial;
+ rxrpc_serial_t ack_serial;
rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
int nr_acks, offset, ioffset;

@@ -989,11 +1025,11 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
offset = sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);

ack_serial = sp->hdr.serial;
- acked_serial = sp->ack.acked_serial;
first_soft_ack = sp->ack.first_ack;
prev_pkt = sp->ack.prev_ack;
nr_acks = sp->ack.nr_acks;
hard_ack = first_soft_ack - 1;
+ summary.acked_serial = sp->ack.acked_serial;
summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ?
sp->ack.reason : RXRPC_ACK__INVALID);

@@ -1001,21 +1037,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]);
prefetch(call->tx_queue);

- if (acked_serial != 0) {
- switch (summary.ack_reason) {
- case RXRPC_ACK_PING_RESPONSE:
- rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
- rxrpc_rtt_rx_ping_response);
- break;
- case RXRPC_ACK_REQUESTED:
- rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
- rxrpc_rtt_rx_requested_ack);
- break;
- default:
- rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
- rxrpc_rtt_rx_other_ack);
- break;
- }
+ if (summary.acked_serial != 0) {
+ if (summary.ack_reason == RXRPC_ACK_PING_RESPONSE)
+ rxrpc_complete_rtt_probe(call, skb->tstamp, summary.acked_serial,
+ ack_serial, rxrpc_rtt_rx_ping_response);
+ else
+ summary.rtt_sample_avail = true;
}

/* If we get an EXCEEDS_WINDOW ACK from the server, it probably
@@ -1068,8 +1095,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
case RXRPC_ACK_PING:
break;
default:
- if (acked_serial && after(acked_serial, call->acks_highest_serial))
- call->acks_highest_serial = acked_serial;
+ if (summary.acked_serial &&
+ after(summary.acked_serial, call->acks_highest_serial))
+ call->acks_highest_serial = summary.acked_serial;
break;
}

@@ -1098,7 +1126,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow);

if (after(hard_ack, call->tx_bottom)) {
- if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
+ if (rxrpc_rotate_tx_window(call, hard_ack, &summary, ack_serial)) {
rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack);
goto send_response;
}
@@ -1116,7 +1144,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_propose_ping(call, ack_serial,
rxrpc_propose_ack_ping_for_lost_reply);

- rxrpc_congestion_management(call, skb, &summary, acked_serial);
+ rxrpc_congestion_management(call, &summary);
if (summary.need_retransmit)
rxrpc_resend(call, ack_serial, summary.ack_reason == RXRPC_ACK_PING_RESPONSE);

@@ -1136,7 +1164,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_ack_summary summary = { 0 };

- if (rxrpc_rotate_tx_window(call, call->tx_top, &summary))
+ if (rxrpc_rotate_tx_window(call, call->tx_top, &summary, 0))
rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall);
}

diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 5387bf9b0015..50798a641e37 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -435,7 +435,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call,
trace_rxrpc_req_ack(call->debug_id, txb->seq, why);
if (why != rxrpc_reqack_no_srv_last) {
flags |= RXRPC_REQUEST_ACK;
- rxrpc_begin_rtt_probe(call, serial, req->now, rxrpc_rtt_tx_data);
+ trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, -1, serial);
call->peer->rtt_last_req = req->now;
}
dont_set_request_ack:
@@ -507,6 +507,10 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se

_debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq);
tq->segment_xmit_ts[ix] = xmit_ts;
+ tq->segment_serial[ix] = serial;
+ if (i + 1 == req->n)
+ /* Only sample the last subpacket in a jumbo. */
+ __set_bit(ix, &tq->rtt_samples);
len += rxrpc_prepare_data_subpacket(call, req, txb, serial, i);
serial++;
seq++;