[PATCH net-next 21/37] rxrpc: Use the new rxrpc_tx_queue struct to more efficiently process ACKs

From: David Howells
Date: Mon Dec 02 2024 - 09:49:37 EST


With the change in the structure of the transmission buffer to store
buffers in bunches of 32 or 64 (BITS_PER_LONG) we can place sets of
per-buffer flags into the rxrpc_tx_queue struct rather than storing them in
rxrpc_tx_buf, thereby vastly increasing efficiency when assessing the SACK
table in an ACK packet.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: Marc Dionne <marc.dionne@xxxxxxxxxxxx>
cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
cc: Eric Dumazet <edumazet@xxxxxxxxxx>
cc: Jakub Kicinski <kuba@xxxxxxxxxx>
cc: Paolo Abeni <pabeni@xxxxxxxxxx>
cc: linux-afs@xxxxxxxxxxxxxxxxxxx
cc: netdev@xxxxxxxxxxxxxxx
---
include/trace/events/rxrpc.h | 86 ++++++++++--
net/rxrpc/ar-internal.h | 21 ++-
net/rxrpc/call_event.c | 180 ++++++++++++-------------
net/rxrpc/call_object.c | 1 -
net/rxrpc/input.c | 252 ++++++++++++++++++++++-------------
net/rxrpc/output.c | 10 +-
net/rxrpc/sendmsg.c | 3 +
7 files changed, 349 insertions(+), 204 deletions(-)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index d47b8235fad3..609522a5bd0f 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -132,7 +132,6 @@
EM(rxrpc_skb_get_call_rx, "GET call-rx ") \
EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \
EM(rxrpc_skb_get_conn_work, "GET conn-work") \
- EM(rxrpc_skb_get_last_nack, "GET last-nack") \
EM(rxrpc_skb_get_local_work, "GET locl-work") \
EM(rxrpc_skb_get_reject_work, "GET rej-work ") \
EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \
@@ -147,7 +146,6 @@
EM(rxrpc_skb_put_error_report, "PUT error-rep") \
EM(rxrpc_skb_put_input, "PUT input ") \
EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \
- EM(rxrpc_skb_put_last_nack, "PUT last-nack") \
EM(rxrpc_skb_put_purge, "PUT purge ") \
EM(rxrpc_skb_put_rotate, "PUT rotate ") \
EM(rxrpc_skb_put_unknown, "PUT unknown ") \
@@ -499,6 +497,11 @@
EM(rxrpc_pmtud_reduce_icmp, "Icmp ") \
E_(rxrpc_pmtud_reduce_route, "Route")

+#define rxrpc_rotate_traces \
+ EM(rxrpc_rotate_trace_hack, "hard-ack") \
+ EM(rxrpc_rotate_trace_sack, "soft-ack") \
+ E_(rxrpc_rotate_trace_snak, "soft-nack")
+
/*
* Generate enums for tracing information.
*/
@@ -525,6 +528,7 @@ enum rxrpc_propose_ack_trace { rxrpc_propose_ack_traces } __mode(byte);
enum rxrpc_receive_trace { rxrpc_receive_traces } __mode(byte);
enum rxrpc_recvmsg_trace { rxrpc_recvmsg_traces } __mode(byte);
enum rxrpc_req_ack_trace { rxrpc_req_ack_traces } __mode(byte);
+enum rxrpc_rotate_trace { rxrpc_rotate_traces } __mode(byte);
enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_traces } __mode(byte);
enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte);
enum rxrpc_sack_trace { rxrpc_sack_traces } __mode(byte);
@@ -562,6 +566,7 @@ rxrpc_propose_ack_traces;
rxrpc_receive_traces;
rxrpc_recvmsg_traces;
rxrpc_req_ack_traces;
+rxrpc_rotate_traces;
rxrpc_rtt_rx_traces;
rxrpc_rtt_tx_traces;
rxrpc_sack_traces;
@@ -1667,6 +1672,7 @@ TRACE_EVENT(rxrpc_retransmit,

TP_STRUCT__entry(
__field(unsigned int, call)
+ __field(unsigned int, qbase)
__field(rxrpc_seq_t, seq)
__field(rxrpc_serial_t, serial)
__field(ktime_t, expiry)
@@ -1674,13 +1680,15 @@ TRACE_EVENT(rxrpc_retransmit,

TP_fast_assign(
__entry->call = call->debug_id;
+ __entry->qbase = req->tq->qbase;
__entry->seq = req->seq;
__entry->serial = txb->serial;
__entry->expiry = expiry;
),

- TP_printk("c=%08x q=%x r=%x xp=%lld",
+ TP_printk("c=%08x tq=%x q=%x r=%x xp=%lld",
__entry->call,
+ __entry->qbase,
__entry->seq,
__entry->serial,
ktime_to_us(__entry->expiry))
@@ -1724,7 +1732,7 @@ TRACE_EVENT(rxrpc_congest,
memcpy(&__entry->sum, summary, sizeof(__entry->sum));
),

- TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u,%u b=%u u=%u d=%u l=%x%s%s%s",
+ TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u A=%u+%u/%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
__entry->call,
__entry->ack_serial,
__print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names),
@@ -1732,9 +1740,9 @@ TRACE_EVENT(rxrpc_congest,
__print_symbolic(__entry->ca_state, rxrpc_ca_states),
__entry->cwnd,
__entry->ssthresh,
- __entry->nr_sacks, __entry->sum.nr_retained_snacks,
- __entry->sum.nr_new_sacks,
- __entry->sum.nr_new_snacks,
+ __entry->nr_sacks, __entry->sum.nr_new_sacks,
+ __entry->nr_snacks, __entry->sum.nr_new_snacks,
+ __entry->sum.nr_new_hacks,
__entry->top - __entry->hard_ack,
__entry->cumul_acks,
__entry->dup_acks,
@@ -1850,10 +1858,36 @@ TRACE_EVENT(rxrpc_connect_call,
&__entry->srx.transport)
);

+TRACE_EVENT(rxrpc_apply_acks,
+ TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq),
+
+ TP_ARGS(call, tq),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(unsigned int, nr_rep)
+ __field(rxrpc_seq_t, qbase)
+ __field(unsigned long, acks)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->qbase = tq->qbase;
+ __entry->acks = tq->segment_acked;
+ __entry->nr_rep = tq->nr_reported_acks;
+ ),
+
+ TP_printk("c=%08x tq=%x acks=%016lx rep=%u",
+ __entry->call,
+ __entry->qbase,
+ __entry->acks,
+ __entry->nr_rep)
+ );
+
TRACE_EVENT(rxrpc_resend,
- TP_PROTO(struct rxrpc_call *call, struct sk_buff *ack),
+ TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t ack_serial),

- TP_ARGS(call, ack),
+ TP_ARGS(call, ack_serial),

TP_STRUCT__entry(
__field(unsigned int, call)
@@ -1863,11 +1897,10 @@ TRACE_EVENT(rxrpc_resend,
),

TP_fast_assign(
- struct rxrpc_skb_priv *sp = ack ? rxrpc_skb(ack) : NULL;
__entry->call = call->debug_id;
__entry->seq = call->acks_hard_ack;
__entry->transmitted = call->tx_transmitted;
- __entry->ack_serial = sp ? sp->hdr.serial : 0;
+ __entry->ack_serial = ack_serial;
),

TP_printk("c=%08x r=%x q=%x tq=%x",
@@ -1877,6 +1910,37 @@ TRACE_EVENT(rxrpc_resend,
__entry->transmitted)
);

+TRACE_EVENT(rxrpc_rotate,
+ TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq,
+ struct rxrpc_ack_summary *summary, rxrpc_seq_t seq,
+ enum rxrpc_rotate_trace trace),
+
+ TP_ARGS(call, tq, summary, seq, trace),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call)
+ __field(rxrpc_seq_t, qbase)
+ __field(rxrpc_seq_t, seq)
+ __field(unsigned int, nr_rep)
+ __field(enum rxrpc_rotate_trace, trace)
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->qbase = tq->qbase;
+ __entry->seq = seq;
+ __entry->nr_rep = tq->nr_reported_acks;
+ __entry->trace = trace;
+ ),
+
+ TP_printk("c=%08x tq=%x q=%x nr=%x %s",
+ __entry->call,
+ __entry->qbase,
+ __entry->seq,
+ __entry->nr_rep,
+ __print_symbolic(__entry->trace, rxrpc_rotate_traces))
+ );
+
TRACE_EVENT(rxrpc_rx_icmp,
TP_PROTO(struct rxrpc_peer *peer, struct sock_extended_err *ee,
struct sockaddr_rxrpc *srx),
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index f6e6b2ab6c2a..9a5eb6fa1dd1 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -214,9 +214,8 @@ struct rxrpc_skb_priv {
rxrpc_seq_t first_ack; /* First packet in acks table */
rxrpc_seq_t prev_ack; /* Highest seq seen */
rxrpc_serial_t acked_serial; /* Packet in response to (or 0) */
+ u16 nr_acks; /* Number of acks+nacks */
u8 reason; /* Reason for ack */
- u8 nr_acks; /* Number of acks+nacks */
- u8 nr_nacks; /* Number of nacks */
} ack;
};
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
@@ -734,7 +733,6 @@ struct rxrpc_call {
u16 cong_dup_acks; /* Count of ACKs showing missing packets */
u16 cong_cumul_acks; /* Cumulative ACK count */
ktime_t cong_tstamp; /* Last time cwnd was changed */
- struct sk_buff *cong_last_nack; /* Last ACK with nacks received */

/* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
@@ -775,11 +773,10 @@ struct rxrpc_ack_summary {
u16 nr_new_hacks; /* Number of rotated new ACKs */
u16 nr_new_sacks; /* Number of new soft ACKs in packet */
u16 nr_new_snacks; /* Number of new soft nacks in packet */
- u16 nr_retained_snacks; /* Number of nacks retained between ACKs */
u8 ack_reason;
- bool saw_snacks:1; /* T if we saw a soft NACK */
bool new_low_snack:1; /* T if new low soft NACK found */
bool retrans_timeo:1; /* T if reTx due to timeout happened */
+ bool need_retransmit:1; /* T if we need transmission */
u8 /*enum rxrpc_congest_change*/ change;
};

@@ -858,6 +855,10 @@ struct rxrpc_txqueue {
struct rxrpc_txqueue *next;
ktime_t xmit_ts_base;
rxrpc_seq_t qbase;
+ u8 nr_reported_acks; /* Number of segments explicitly acked/nacked */
+ unsigned long segment_acked; /* Bit-per-buf: Set if ACK'd */
+ unsigned long segment_lost; /* Bit-per-buf: Set if declared lost */
+ unsigned long segment_retransmitted; /* Bit-per-buf: Set if retransmitted */

/* The arrays we want to pack into as few cache lines as possible. */
struct {
@@ -935,7 +936,7 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
enum rxrpc_propose_ack_trace why);
void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t,
enum rxrpc_propose_ack_trace);
-void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb);
+void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_response);

bool rxrpc_input_call_event(struct rxrpc_call *call);

@@ -1382,6 +1383,14 @@ static inline bool after_eq(u32 seq1, u32 seq2)
{
return (s32)(seq1 - seq2) >= 0;
}
+static inline u32 earliest(u32 seq1, u32 seq2)
+{
+ return before(seq1, seq2) ? seq1 : seq2;
+}
+static inline u32 latest(u32 seq1, u32 seq2)
+{
+ return after(seq1, seq2) ? seq1 : seq2;
+}

static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
{
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 866d415801a9..887470fb28a4 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -65,9 +65,9 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
/*
* Retransmit one or more packets.
*/
-static void rxrpc_retransmit_data(struct rxrpc_call *call,
+static bool rxrpc_retransmit_data(struct rxrpc_call *call,
struct rxrpc_send_data_req *req,
- ktime_t rto)
+ ktime_t rto, bool skip_too_young)
{
struct rxrpc_txqueue *tq = req->tq;
unsigned int ix = req->seq & RXRPC_TXQ_MASK;
@@ -78,9 +78,11 @@ static void rxrpc_retransmit_data(struct rxrpc_call *call,

xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]);
resend_at = ktime_add(xmit_ts, rto);
- trace_rxrpc_retransmit(call, req, txb,
- ktime_sub(resend_at, req->now));
+ trace_rxrpc_retransmit(call, req, txb, ktime_sub(resend_at, req->now));
+ if (skip_too_young && ktime_after(resend_at, req->now))
+ return false;

+ __set_bit(ix, &tq->segment_retransmitted);
txb->flags |= RXRPC_TXBUF_RESENT;
rxrpc_send_data_packet(call, req);
rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
@@ -89,128 +91,118 @@ static void rxrpc_retransmit_data(struct rxrpc_call *call,
req->n = 0;
req->did_send = true;
req->now = ktime_get_real();
+ return true;
}

/*
* Perform retransmission of NAK'd and unack'd packets.
*/
-void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
+void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_response)
{
struct rxrpc_send_data_req req = {
.now = ktime_get_real(),
};
- struct rxrpc_ackpacket *ack = NULL;
- struct rxrpc_skb_priv *sp;
- struct rxrpc_txqueue *tq;
- struct rxrpc_txbuf *txb;
- rxrpc_seq_t transmitted = call->tx_transmitted, seq;
- ktime_t next_resend = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC);
- ktime_t resend_at = KTIME_MAX, delay;
- bool unacked = false, did_send = false;
- unsigned int qix;
+ struct rxrpc_txqueue *tq = call->tx_queue;
+ ktime_t lowest_xmit_ts = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC);
+ bool unacked = false;

_enter("{%d,%d}", call->tx_bottom, call->tx_top);

- if (call->tx_bottom == call->tx_top)
- goto no_resend;
+ if (call->tx_bottom == call->tx_top) {
+ call->resend_at = KTIME_MAX;
+ trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend);
+ return;
+ }

- trace_rxrpc_resend(call, ack_skb);
- tq = call->tx_queue;
- seq = call->tx_bottom;
+ trace_rxrpc_resend(call, ack_serial);

- /* Scan the soft ACK table and resend any explicitly NAK'd packets. */
- if (ack_skb) {
- sp = rxrpc_skb(ack_skb);
- ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
+ /* Scan the transmission queue, looking for explicitly NAK'd packets. */
+ do {
+ unsigned long naks = ~tq->segment_acked;
+ rxrpc_seq_t tq_top = tq->qbase + RXRPC_NR_TXQUEUE - 1;

- for (int i = 0; i < sp->ack.nr_acks; i++) {
- rxrpc_seq_t aseq;
+ if (after(tq->qbase, call->tx_transmitted))
+ break;

- if (ack->acks[i] & 1)
- continue;
- aseq = sp->ack.first_ack + i;
- while (after_eq(aseq, tq->qbase + RXRPC_NR_TXQUEUE))
- tq = tq->next;
- seq = aseq;
- qix = seq - tq->qbase;
- txb = tq->bufs[qix];
- if (after(seq, transmitted))
- goto no_further_resend;
-
- resend_at = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[qix]);
- resend_at = ktime_add(resend_at, rto);
- if (after(txb->serial, call->acks_highest_serial)) {
- if (ktime_after(resend_at, req.now) &&
- ktime_before(resend_at, next_resend))
- next_resend = resend_at;
+ if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE)
+ naks &= (1UL << tq->nr_reported_acks) - 1;
+
+ _debug("retr %16lx %u c=%08x [%x]",
+ tq->segment_acked, tq->nr_reported_acks, call->debug_id, tq->qbase);
+ _debug("nack %16lx", naks);
+
+ while (naks) {
+ unsigned int ix = __ffs(naks);
+ struct rxrpc_txbuf *txb = tq->bufs[ix];
+
+ __clear_bit(ix, &naks);
+ if (after(txb->serial, call->acks_highest_serial))
continue; /* Ack point not yet reached */
- }

rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked);

req.tq = tq;
- req.seq = seq;
+ req.seq = tq->qbase + ix;
req.n = 1;
- rxrpc_retransmit_data(call, &req, rto);
-
- if (after_eq(seq, call->tx_top))
- goto no_further_resend;
+ rxrpc_retransmit_data(call, &req, rto, false);
}
- }

- /* Fast-forward through the Tx queue to the point the peer says it has
- * seen. Anything between the soft-ACK table and that point will get
- * ACK'd or NACK'd in due course, so don't worry about it here; here we
- * need to consider retransmitting anything beyond that point.
- */
- seq = call->acks_prev_seq;
- if (after_eq(seq, call->tx_transmitted))
- goto no_further_resend;
- seq++;
-
- while (after_eq(seq, tq->qbase + RXRPC_NR_TXQUEUE))
- tq = tq->next;
-
- while (before_eq(seq, call->tx_transmitted)) {
- qix = seq - tq->qbase;
- if (qix >= RXRPC_NR_TXQUEUE) {
- tq = tq->next;
- continue;
+ /* Anything after the soft-ACK table up to and including
+ * ack.previousPacket will get ACK'd or NACK'd in due course,
+ * so don't worry about those here. We do, however, need to
+ * consider retransmitting anything beyond that point.
+ */
+ if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE &&
+ after(tq_top, call->acks_prev_seq)) {
+ rxrpc_seq_t start = latest(call->acks_prev_seq,
+ tq->qbase + tq->nr_reported_acks);
+ rxrpc_seq_t stop = earliest(tq_top, call->tx_transmitted);
+
+ _debug("unrep %x-%x", start, stop);
+ for (rxrpc_seq_t seq = start; before(seq, stop); seq++) {
+ struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK];
+
+ if (ping_response &&
+ before(txb->serial, call->acks_highest_serial))
+ break; /* Wasn't accounted for by a more recent ping. */
+ req.tq = tq;
+ req.seq = seq;
+ req.n = 1;
+ if (rxrpc_retransmit_data(call, &req, rto, true))
+ unacked = true;
+ }
}
- txb = tq->bufs[qix];
- resend_at = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[qix]);
- resend_at = ktime_add(resend_at, rto);

- if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE &&
- before(txb->serial, ntohl(ack->serial)))
- goto do_resend; /* Wasn't accounted for by a more recent ping. */
+ /* Work out the next retransmission timeout. */
+ if (ktime_before(tq->xmit_ts_base, lowest_xmit_ts)) {
+ unsigned int lowest_us = UINT_MAX;

- if (ktime_after(resend_at, req.now)) {
- if (ktime_before(resend_at, next_resend))
- next_resend = resend_at;
- seq++;
- continue;
- }
+ for (int i = 0; i < RXRPC_NR_TXQUEUE; i++)
+ if (!test_bit(i, &tq->segment_acked) &&
+ tq->segment_xmit_ts[i] < lowest_us)
+ lowest_us = tq->segment_xmit_ts[i];
+ _debug("lowest[%x] %llx %u", tq->qbase, tq->xmit_ts_base, lowest_us);

- do_resend:
- unacked = true;
+ if (lowest_us != UINT_MAX) {
+ ktime_t lowest_ns = ktime_add_us(tq->xmit_ts_base, lowest_us);
+ if (ktime_before(lowest_ns, lowest_xmit_ts))
+ lowest_xmit_ts = lowest_ns;
+ }
+ }
+ } while ((tq = tq->next));

- req.tq = tq;
- req.seq = seq;
- req.n = 1;
- rxrpc_retransmit_data(call, &req, rto);
- seq++;
- }
+ if (lowest_xmit_ts < KTIME_MAX) {
+ ktime_t delay = rxrpc_get_rto_backoff(call->peer, req.did_send);
+ ktime_t resend_at = ktime_add(lowest_xmit_ts, delay);

-no_further_resend:
-no_resend:
- if (resend_at < KTIME_MAX) {
- delay = rxrpc_get_rto_backoff(call->peer, did_send);
- resend_at = ktime_add(resend_at, delay);
+ _debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now));
+ call->resend_at = resend_at;
trace_rxrpc_timer_set(call, resend_at - req.now,
rxrpc_timer_trace_resend_reset);
+ } else {
+ call->resend_at = KTIME_MAX;
+ trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend);
}
- call->resend_at = resend_at;

if (unacked)
rxrpc_congestion_timeout(call);
@@ -494,7 +486,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call)
if (resend &&
__rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY &&
!test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags))
- rxrpc_resend(call, NULL);
+ rxrpc_resend(call, 0, false);

if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index a9682b31a4f9..bba058055c97 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -691,7 +691,6 @@ static void rxrpc_destroy_call(struct work_struct *work)

del_timer_sync(&call->timer);

- rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
rxrpc_cleanup_tx_buffers(call);
rxrpc_cleanup_rx_buffers(call);
rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 226da6ecb91d..5aadc087794e 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -34,8 +34,6 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
struct rxrpc_ack_summary *summary,
rxrpc_serial_t acked_serial)
{
- bool resend = false;
-
summary->change = rxrpc_cong_no_change;
summary->in_flight = (call->tx_top - call->tx_bottom) - call->acks_nr_sacks;

@@ -52,12 +50,13 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
}

call->cong_cumul_acks += summary->nr_new_sacks;
+ call->cong_cumul_acks += summary->nr_new_hacks;
if (call->cong_cumul_acks > 255)
call->cong_cumul_acks = 255;

switch (call->cong_ca_state) {
case RXRPC_CA_SLOW_START:
- if (summary->saw_snacks)
+ if (call->acks_nr_snacks > 0)
goto packet_loss_detected;
if (call->cong_cumul_acks > 0)
call->cong_cwnd += 1;
@@ -68,7 +67,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
goto out;

case RXRPC_CA_CONGEST_AVOIDANCE:
- if (summary->saw_snacks)
+ if (call->acks_nr_snacks > 0)
goto packet_loss_detected;

/* We analyse the number of packets that get ACK'd per RTT
@@ -87,7 +86,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
goto out;

case RXRPC_CA_PACKET_LOSS:
- if (!summary->saw_snacks)
+ if (call->acks_nr_snacks == 0)
goto resume_normality;

if (summary->new_low_snack) {
@@ -108,7 +107,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
call->cong_cwnd = call->cong_ssthresh + 3;
call->cong_extra = 0;
call->cong_dup_acks = 0;
- resend = true;
+ summary->need_retransmit = true;
goto out;

case RXRPC_CA_FAST_RETRANSMIT:
@@ -119,12 +118,12 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
if (call->cong_dup_acks == 2) {
summary->change = rxrpc_cong_retransmit_again;
call->cong_dup_acks = 0;
- resend = true;
+ summary->need_retransmit = true;
}
} else {
summary->change = rxrpc_cong_progress;
call->cong_cwnd = call->cong_ssthresh;
- if (!summary->saw_snacks)
+ if (call->acks_nr_snacks == 0)
goto resume_normality;
}
goto out;
@@ -149,8 +148,6 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
if (call->cong_cwnd >= RXRPC_TX_MAX_WINDOW)
call->cong_cwnd = RXRPC_TX_MAX_WINDOW;
trace_rxrpc_congest(call, summary, acked_serial);
- if (resend)
- rxrpc_resend(call, skb);
return;

packet_loss_detected:
@@ -212,6 +209,13 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
trace_rxrpc_tx_rotate(call, seq, to);
trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate);

+ if (call->acks_lowest_nak == call->tx_bottom) {
+ call->acks_lowest_nak = to;
+ } else if (after(to, call->acks_lowest_nak)) {
+ summary->new_low_snack = true;
+ call->acks_lowest_nak = to;
+ }
+
/* We may have a left over fully-consumed buffer at the front that we
* couldn't drop before (rotate_and_keep below).
*/
@@ -231,6 +235,25 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
set_bit(RXRPC_CALL_TX_LAST, &call->flags);
rot_last = true;
}
+
+ if (ix == tq->nr_reported_acks) {
+ /* Packet directly hard ACK'd. */
+ tq->nr_reported_acks++;
+ summary->nr_new_hacks++;
+ __set_bit(ix, &tq->segment_acked);
+ trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_hack);
+ } else if (test_bit(ix, &tq->segment_acked)) {
+ /* Soft ACK -> hard ACK. */
+ call->acks_nr_sacks--;
+ trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_sack);
+ } else {
+ /* Soft NAK -> hard ACK. */
+ call->acks_nr_snacks--;
+ summary->nr_new_hacks++;
+ __set_bit(ix, &tq->segment_acked);
+ trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_snak);
+ }
+
rxrpc_put_txbuf(tq->bufs[ix], rxrpc_txbuf_put_rotated);
tq->bufs[ix] = NULL;

@@ -268,13 +291,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,

_debug("%x,%x,%x,%d", to, call->tx_bottom, call->tx_top, rot_last);

- if (call->acks_lowest_nak == call->tx_bottom) {
- call->acks_lowest_nak = to;
- } else if (after(to, call->acks_lowest_nak)) {
- summary->new_low_snack = true;
- call->acks_lowest_nak = to;
- }
-
wake_up(&call->waitq);
return rot_last;
}
@@ -293,11 +309,6 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
call->resend_at = KTIME_MAX;
trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend);

- if (unlikely(call->cong_last_nack)) {
- rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
- call->cong_last_nack = NULL;
- }
-
switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
@@ -770,40 +781,92 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb
wake_up(&call->waitq);
}

+#if defined(CONFIG_X86) && __GNUC__ && !defined(__clang__)
+/* Clang doesn't support the %z constraint modifier */
+#define shiftr_adv_rotr(shift_from, rotate_into) ({ \
+ asm(" shr%z1 %1\n" \
+ " inc %0\n" \
+ " rcr%z2 %2\n" \
+ : "+d"(shift_from), "+m"(*shift_from), "+rm"(rotate_into) \
+ ); \
+ })
+#else
+#define shiftr_adv_rotr(shift_from, rotate_into) ({ \
+ typeof(rotate_into) __bit0 = *shift_from & 1; \
+ *shift_from >>= 1; \
+ shift_from++; \
+ rotate_into >>= 1; \
+ rotate_into |= __bit0 << (sizeof(rotate_into) * 8 - 1); \
+ })
+#endif
+
/*
- * Determine how many nacks from the previous ACK have now been satisfied.
+ * Process a batch of soft ACKs specific to a transmission queue segment.
*/
-static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
- struct rxrpc_ack_summary *summary,
- rxrpc_seq_t hard_ack)
+static void rxrpc_input_soft_ack_tq(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct rxrpc_txqueue *tq,
+ unsigned long extracted_acks,
+ int nr_reported,
+ rxrpc_seq_t seq,
+ rxrpc_seq_t *lowest_nak)
{
- struct sk_buff *skb = call->cong_last_nack;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- unsigned int i, new_acks = 0, retained_nacks = 0;
- rxrpc_seq_t seq = hard_ack + 1, old_seq = sp->ack.first_ack;
- u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
+ unsigned long old_reported, flipped, new_acks, a_to_n, n_to_a;
+ int new, a, n;
+
+ old_reported = ~0UL >> (RXRPC_NR_TXQUEUE - tq->nr_reported_acks);
+ _enter("{%x,%lx,%d},%lx,%d,%x",
+ tq->qbase, tq->segment_acked, tq->nr_reported_acks,
+ extracted_acks, nr_reported, seq);
+
+ _debug("[%x]", tq->qbase);
+ _debug("tq %16lx %u", tq->segment_acked, tq->nr_reported_acks);
+ _debug("sack %16lx %u", extracted_acks, nr_reported);
+
+ /* See how many previously logged ACKs/NAKs have flipped. */
+ flipped = (tq->segment_acked ^ extracted_acks) & old_reported;
+ if (flipped) {
+ n_to_a = ~tq->segment_acked & flipped; /* Old NAK -> ACK */
+ a_to_n = tq->segment_acked & flipped; /* Old ACK -> NAK */
+ a = hweight_long(n_to_a);
+ n = hweight_long(a_to_n);
+ _debug("flip %16lx", flipped);
+ _debug("ntoa %16lx %d", n_to_a, a);
+ _debug("aton %16lx %d", a_to_n, n);
+ call->acks_nr_sacks += a - n;
+ call->acks_nr_snacks += n - a;
+ summary->nr_new_sacks += a;
+ summary->nr_new_snacks += n;
+ }

- if (after_eq(seq, old_seq + sp->ack.nr_acks)) {
- summary->nr_new_sacks += sp->ack.nr_nacks;
- summary->nr_new_sacks += seq - (old_seq + sp->ack.nr_acks);
- summary->nr_retained_snacks = 0;
- } else if (seq == old_seq) {
- summary->nr_retained_snacks = sp->ack.nr_nacks;
- } else {
- for (i = 0; i < sp->ack.nr_acks; i++) {
- if (acks[i] == RXRPC_ACK_TYPE_NACK) {
- if (before(old_seq + i, seq))
- new_acks++;
- else
- retained_nacks++;
- }
+ /* See how many new ACKs/NAKs have been acquired. */
+ new = nr_reported - tq->nr_reported_acks;
+ if (new > 0) {
+ new_acks = extracted_acks & ~old_reported;
+ if (new_acks) {
+ a = hweight_long(new_acks);
+ n = new - a;
+ _debug("new_a %16lx new=%d a=%d n=%d", new_acks, new, a, n);
+ call->acks_nr_sacks += a;
+ call->acks_nr_snacks += n;
+ summary->nr_new_sacks += a;
+ summary->nr_new_snacks += n;
+ } else {
+ call->acks_nr_snacks += new;
+ summary->nr_new_snacks += new;
}
-
- summary->nr_new_sacks += new_acks;
- summary->nr_retained_snacks = retained_nacks;
}

- return old_seq + sp->ack.nr_acks - 1;
+ tq->nr_reported_acks = nr_reported;
+ tq->segment_acked = extracted_acks;
+ trace_rxrpc_apply_acks(call, tq);
+
+ if (extracted_acks != ~0UL) {
+ rxrpc_seq_t lowest = seq + ffz(extracted_acks);
+
+ if (before(lowest, *lowest_nak))
+ *lowest_nak = lowest;
+ }
}

/*
@@ -817,39 +880,50 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
*/
static void rxrpc_input_soft_acks(struct rxrpc_call *call,
struct rxrpc_ack_summary *summary,
- struct sk_buff *skb,
- rxrpc_seq_t since)
+ struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- unsigned int i, old_nacks = 0;
- rxrpc_seq_t lowest_nak = call->acks_hard_ack + sp->ack.nr_acks + 1;
- rxrpc_seq_t seq = call->acks_hard_ack;
+ struct rxrpc_txqueue *tq = call->tx_queue;
+ unsigned long extracted = ~0UL;
+ unsigned int nr = 0;
+ rxrpc_seq_t seq = call->acks_hard_ack + 1;
+ rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks;
u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);

- for (i = 0; i < sp->ack.nr_acks; i++) {
- seq++;
- if (acks[i] == RXRPC_ACK_TYPE_ACK) {
- call->acks_nr_sacks++;
- if (after(seq, since))
- summary->nr_new_sacks++;
- } else {
- summary->saw_snacks = true;
- if (before_eq(seq, since)) {
- /* Overlap with previous ACK */
- old_nacks++;
- } else {
- summary->nr_new_snacks++;
- sp->ack.nr_nacks++;
- }
+ _enter("%x,%x,%u", tq->qbase, seq, sp->ack.nr_acks);
+
+ while (after(seq, tq->qbase + RXRPC_NR_TXQUEUE - 1))
+ tq = tq->next;

- if (before(seq, lowest_nak))
- lowest_nak = seq;
+ for (unsigned int i = 0; i < sp->ack.nr_acks; i++) {
+ /* Decant ACKs until we hit a txqueue boundary. */
+ shiftr_adv_rotr(acks, extracted);
+ if (i == 256) {
+ acks -= i;
+ i = 0;
}
+ seq++;
+ nr++;
+ if ((seq & RXRPC_TXQ_MASK) != 0)
+ continue;
+
+ _debug("bound %16lx %u", extracted, nr);
+
+ rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE,
+ seq - RXRPC_NR_TXQUEUE, &lowest_nak);
+ extracted = ~0UL;
+ nr = 0;
+ tq = tq->next;
+ prefetch(tq);
}

- if (lowest_nak != call->acks_lowest_nak) {
- call->acks_lowest_nak = lowest_nak;
- summary->new_low_snack = true;
+ if (nr) {
+ unsigned int nr_reported = seq & RXRPC_TXQ_MASK;
+
+ extracted >>= RXRPC_NR_TXQUEUE - nr_reported;
+ _debug("tail %16lx %u", extracted, nr_reported);
+ rxrpc_input_soft_ack_tq(call, summary, tq, extracted, nr_reported,
+ seq & ~RXRPC_TXQ_MASK, &lowest_nak);
}

/* We *can* have more nacks than we did - the peer is permitted to drop
@@ -857,9 +931,14 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call,
* possible for the nack distribution to change whilst the number of
* nacks stays the same or goes down.
*/
- if (old_nacks < summary->nr_retained_snacks)
- summary->nr_new_sacks += summary->nr_retained_snacks - old_nacks;
- summary->nr_retained_snacks = old_nacks;
+ if (lowest_nak != call->acks_lowest_nak) {
+ call->acks_lowest_nak = lowest_nak;
+ summary->new_low_snack = true;
+ }
+
+ _debug("summary A=%d+%d N=%d+%d",
+ call->acks_nr_sacks, summary->nr_new_sacks,
+ call->acks_nr_snacks, summary->nr_new_snacks);
}

/*
@@ -902,7 +981,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_acktrailer trailer;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
rxrpc_serial_t ack_serial, acked_serial;
- rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since;
+ rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
int nr_acks, offset, ioffset;

_enter("");
@@ -920,6 +999,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)

trace_rxrpc_rx_ack(call, sp);
rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]);
+ prefetch(call->tx_queue);

if (acked_serial != 0) {
switch (summary.ack_reason) {
@@ -980,16 +1060,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (nr_acks > 0)
skb_condense(skb);

- if (call->cong_last_nack) {
- since = rxrpc_input_check_prev_ack(call, &summary, hard_ack);
- rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
- call->cong_last_nack = NULL;
- } else {
- summary.nr_new_sacks = hard_ack - call->acks_hard_ack;
- call->acks_lowest_nak = hard_ack + nr_acks;
- since = hard_ack;
- }
-
call->acks_latest_ts = skb->tstamp;
call->acks_hard_ack = hard_ack;
call->acks_prev_seq = prev_pkt;
@@ -1037,9 +1107,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (nr_acks > 0) {
if (offset > (int)skb->len - nr_acks)
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack);
- rxrpc_input_soft_acks(call, &summary, skb, since);
- rxrpc_get_skb(skb, rxrpc_skb_get_last_nack);
- call->cong_last_nack = skb;
+ rxrpc_input_soft_acks(call, &summary, skb);
}

if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
@@ -1049,6 +1117,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_propose_ack_ping_for_lost_reply);

rxrpc_congestion_management(call, skb, &summary, acked_serial);
+ if (summary.need_retransmit)
+ rxrpc_resend(call, ack_serial, summary.ack_reason == RXRPC_ACK_PING_RESPONSE);

send_response:
if (summary.ack_reason == RXRPC_ACK_PING)
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 57a5bb7034e4..5387bf9b0015 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -460,7 +460,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call,
len += sizeof(*jumbo);
}

- trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, false);
+ trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags | flags, false);
kv->iov_len = len;
return len;
}
@@ -521,6 +521,13 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se
}

/* Set timeouts */
+ if (call->peer->rtt_count > 1) {
+ ktime_t delay = rxrpc_get_rto_backoff(call->peer, false);
+
+ call->ack_lost_at = ktime_add(req->now, delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack);
+ }
+
if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) {
ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo));

@@ -594,6 +601,7 @@ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req
ret = 0;
trace_rxrpc_tx_data(call, txb->seq, txb->serial,
txb->flags, true);
+ conn->peer->last_tx_at = ktime_get_seconds();
goto done;
}
}
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index fcd111709b50..e602dde0189c 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -297,6 +297,9 @@ static int rxrpc_alloc_txqueue(struct sock *sk, struct rxrpc_call *call)
kfree(tq);
return -ENOMEM;
} else {
+ /* We start at seq 1, so pretend seq 0 is hard-acked. */
+ tq->nr_reported_acks = 1;
+ tq->segment_acked = 1UL;
tq->qbase = 0;
call->tx_qbase = 0;
call->send_queue = tq;