From: David Howells <dhowells@redhat.com>
To: netdev@vger.kernel.org
Cc: David Howells <dhowells@redhat.com>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Marc Dionne <marc.dionne@auristor.com>,
linux-afs@lists.infradead.org, linux-kernel@vger.kernel.org
Subject: [PATCH net-next 08/13] rxrpc: Simplify ACK handling
Date: Tue, 31 Jan 2023 17:12:22 +0000 [thread overview]
Message-ID: <20230131171227.3912130-9-dhowells@redhat.com> (raw)
In-Reply-To: <20230131171227.3912130-1-dhowells@redhat.com>
Now that general ACK transmission is done from the same thread as incoming
DATA packet wrangling, there's no possibility that the SACK table will be
being updated by the latter whilst the former is trying to copy it to an
ACK.
This means that we can safely rotate the SACK table whilst updating it
without having to take a lock, rather than keeping all the bits inside it
in fixed place and copying and then rotating it in the transmitter.
Therefore, simplify SACK handing by keeping track of starting point in the
ring and rotate slots down as we consume them.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
include/trace/events/rxrpc.h | 36 ++++++++++++++++++++++++++++
net/rxrpc/ar-internal.h | 1 +
net/rxrpc/input.c | 46 ++++++++++++++++++------------------
net/rxrpc/output.c | 46 ++++++++++--------------------------
4 files changed, 73 insertions(+), 56 deletions(-)
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index e51a84f349d8..b6adec9111e1 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -422,6 +422,13 @@
EM(RXRPC_ACK_IDLE, "IDL") \
E_(RXRPC_ACK__INVALID, "-?-")
+#define rxrpc_sack_traces \
+ EM(rxrpc_sack_advance, "ADV") \
+ EM(rxrpc_sack_fill, "FIL") \
+ EM(rxrpc_sack_nack, "NAK") \
+ EM(rxrpc_sack_none, "---") \
+ E_(rxrpc_sack_oos, "OOS")
+
#define rxrpc_completions \
EM(RXRPC_CALL_SUCCEEDED, "Succeeded") \
EM(RXRPC_CALL_REMOTELY_ABORTED, "RemoteAbort") \
@@ -497,6 +504,7 @@ enum rxrpc_recvmsg_trace { rxrpc_recvmsg_traces } __mode(byte);
enum rxrpc_req_ack_trace { rxrpc_req_ack_traces } __mode(byte);
enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_traces } __mode(byte);
enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte);
+enum rxrpc_sack_trace { rxrpc_sack_traces } __mode(byte);
enum rxrpc_skb_trace { rxrpc_skb_traces } __mode(byte);
enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte);
enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte);
@@ -531,6 +539,7 @@ rxrpc_recvmsg_traces;
rxrpc_req_ack_traces;
rxrpc_rtt_rx_traces;
rxrpc_rtt_tx_traces;
+rxrpc_sack_traces;
rxrpc_skb_traces;
rxrpc_timer_traces;
rxrpc_tx_points;
@@ -1929,6 +1938,33 @@ TRACE_EVENT(rxrpc_call_poked,
__entry->call_debug_id)
);
+TRACE_EVENT(rxrpc_sack,
+ TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq,
+ unsigned int sack, enum rxrpc_sack_trace what),
+
+ TP_ARGS(call, seq, sack, what),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call_debug_id)
+ __field(rxrpc_seq_t, seq)
+ __field(unsigned int, sack)
+ __field(enum rxrpc_sack_trace, what)
+ ),
+
+ TP_fast_assign(
+ __entry->call_debug_id = call->debug_id;
+ __entry->seq = seq;
+ __entry->sack = sack;
+ __entry->what = what;
+ ),
+
+ TP_printk("c=%08x q=%08x %s k=%x",
+ __entry->call_debug_id,
+ __entry->seq,
+ __print_symbolic(__entry->what, rxrpc_sack_traces),
+ __entry->sack)
+ );
+
#undef EM
#undef E_
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 2ca99688f7f0..2b1d0d3ca064 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -691,6 +691,7 @@ struct rxrpc_call {
/* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
+ u16 ackr_sack_base; /* Starting slot in SACK table ring */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
rxrpc_seq_t ackr_window; /* Base of SACK window */
rxrpc_seq_t ackr_wtop; /* Base of SACK window */
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 7e65c7d5bff0..d68848fce51f 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -368,6 +368,7 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct sk_buff *oos;
rxrpc_serial_t serial = sp->hdr.serial;
+ unsigned int sack = call->ackr_sack_base;
rxrpc_seq_t window = call->ackr_window;
rxrpc_seq_t wtop = call->ackr_wtop;
rxrpc_seq_t wlimit = window + call->rx_winsize - 1;
@@ -410,9 +411,6 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
/* Queue the packet. */
if (seq == window) {
- rxrpc_seq_t reset_from;
- bool reset_sack = false;
-
if (sp->hdr.flags & RXRPC_REQUEST_ACK)
ack_reason = RXRPC_ACK_REQUESTED;
/* Send an immediate ACK if we fill in a hole */
@@ -422,8 +420,14 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
call->ackr_nr_unacked++;
window++;
- if (after(window, wtop))
+ if (after(window, wtop)) {
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_none);
wtop = window;
+ } else {
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_advance);
+ sack = (sack + 1) % RXRPC_SACK_SIZE;
+ }
+
rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg);
@@ -440,43 +444,39 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
__skb_unlink(oos, &call->rx_oos_queue);
last = osp->hdr.flags & RXRPC_LAST_PACKET;
seq = osp->hdr.seq;
- if (!reset_sack) {
- reset_from = seq;
- reset_sack = true;
- }
+ call->ackr_sack_table[sack] = 0;
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_fill);
+ sack = (sack + 1) % RXRPC_SACK_SIZE;
window++;
rxrpc_input_queue_data(call, oos, window, wtop,
- rxrpc_receive_queue_oos);
+ rxrpc_receive_queue_oos);
}
spin_unlock(&call->recvmsg_queue.lock);
- if (reset_sack) {
- do {
- call->ackr_sack_table[reset_from % RXRPC_SACK_SIZE] = 0;
- } while (reset_from++, before(reset_from, window));
- }
+ call->ackr_sack_base = sack;
} else {
- bool keep = false;
+ unsigned int slot;
ack_reason = RXRPC_ACK_OUT_OF_SEQUENCE;
- if (!call->ackr_sack_table[seq % RXRPC_SACK_SIZE]) {
- call->ackr_sack_table[seq % RXRPC_SACK_SIZE] = 1;
- keep = 1;
+ slot = seq - window;
+ sack = (sack + slot) % RXRPC_SACK_SIZE;
+
+ if (call->ackr_sack_table[sack % RXRPC_SACK_SIZE]) {
+ ack_reason = RXRPC_ACK_DUPLICATE;
+ goto send_ack;
}
+ call->ackr_sack_table[sack % RXRPC_SACK_SIZE] |= 1;
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_oos);
+
if (after(seq + 1, wtop)) {
wtop = seq + 1;
rxrpc_input_update_ack_window(call, window, wtop);
}
- if (!keep) {
- ack_reason = RXRPC_ACK_DUPLICATE;
- goto send_ack;
- }
-
skb_queue_walk(&call->rx_oos_queue, oos) {
struct rxrpc_skb_priv *osp = rxrpc_skb(oos);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index b6bd5e6ccb4c..c69c31470fa8 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -83,56 +83,36 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
struct rxrpc_txbuf *txb)
{
struct rxrpc_ackinfo ackinfo;
- unsigned int qsize;
- rxrpc_seq_t window, wtop, wrap_point, ix, first;
+ unsigned int qsize, sack, wrap, to;
+ rxrpc_seq_t window, wtop;
int rsize;
u32 mtu, jmax;
u8 *ackp = txb->acks;
- u8 sack_buffer[sizeof(call->ackr_sack_table)] __aligned(8);
call->ackr_nr_unacked = 0;
atomic_set(&call->ackr_nr_consumed, 0);
rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
+ clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
- /* Barrier against rxrpc_input_data(). */
-retry:
window = call->ackr_window;
wtop = call->ackr_wtop;
+ sack = call->ackr_sack_base % RXRPC_SACK_SIZE;
txb->ack.firstPacket = htonl(window);
- txb->ack.nAcks = 0;
+ txb->ack.nAcks = wtop - window;
if (after(wtop, window)) {
- /* Try to copy the SACK ring locklessly. We can use the copy,
- * only if the now-current top of the window didn't go past the
- * previously read base - otherwise we can't know whether we
- * have old data or new data.
- */
- memcpy(sack_buffer, call->ackr_sack_table, sizeof(sack_buffer));
- wrap_point = window + RXRPC_SACK_SIZE - 1;
- window = call->ackr_window;
- wtop = call->ackr_wtop;
- if (after(wtop, wrap_point)) {
- cond_resched();
- goto retry;
- }
-
- /* The buffer is maintained as a ring with an invariant mapping
- * between bit position and sequence number, so we'll probably
- * need to rotate it.
- */
- txb->ack.nAcks = wtop - window;
- ix = window % RXRPC_SACK_SIZE;
- first = sizeof(sack_buffer) - ix;
+ wrap = RXRPC_SACK_SIZE - sack;
+ to = min_t(unsigned int, txb->ack.nAcks, RXRPC_SACK_SIZE);
- if (ix + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
- memcpy(txb->acks, sack_buffer + ix, txb->ack.nAcks);
+ if (sack + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
+ memcpy(txb->acks, call->ackr_sack_table + sack, txb->ack.nAcks);
} else {
- memcpy(txb->acks, sack_buffer + ix, first);
- memcpy(txb->acks + first, sack_buffer,
- txb->ack.nAcks - first);
+ memcpy(txb->acks, call->ackr_sack_table + sack, wrap);
+ memcpy(txb->acks + wrap, call->ackr_sack_table,
+ to - wrap);
}
- ackp += txb->ack.nAcks;
+ ackp += to;
} else if (before(wtop, window)) {
pr_warn("ack window backward %x %x", window, wtop);
} else if (txb->ack.reason == RXRPC_ACK_DELAY) {
next prev parent reply other threads:[~2023-01-31 17:14 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-31 17:12 [PATCH net-next 00/13] rxrpc: Increasing SACK size and moving away from softirq, part 5 David Howells
2023-01-31 17:12 ` [PATCH net-next 01/13] rxrpc: Fix trace string David Howells
2023-01-31 17:12 ` [PATCH net-next 02/13] rxrpc: Remove whitespace before ')' in trace header David Howells
2023-01-31 17:12 ` [PATCH net-next 03/13] rxrpc: Shrink the tabulation in the rxrpc trace header a bit David Howells
2023-01-31 17:12 ` [PATCH net-next 04/13] rxrpc: Convert call->recvmsg_lock to a spinlock David Howells
2023-01-31 17:12 ` [PATCH net-next 05/13] rxrpc: Allow a delay to be injected into packet reception David Howells
2023-01-31 17:12 ` [PATCH net-next 06/13] rxrpc: Generate extra pings for RTT during heavy-receive call David Howells
2023-01-31 17:12 ` [PATCH net-next 07/13] rxrpc: De-atomic call->ackr_window and call->ackr_nr_unacked David Howells
2023-01-31 17:12 ` David Howells [this message]
2023-01-31 17:12 ` [PATCH net-next 09/13] rxrpc: Don't lock call->tx_lock to access call->tx_buffer David Howells
2023-01-31 17:12 ` [PATCH net-next 10/13] rxrpc: Remove local->defrag_sem David Howells
2023-01-31 17:12 ` [PATCH net-next 11/13] rxrpc: Show consumed and freed packets as non-dropped in dropwatch David Howells
2023-02-02 10:42 ` Paolo Abeni
2023-01-31 17:12 ` [PATCH net-next 12/13] rxrpc: Change rx_packet tracepoint to display securityIndex not type twice David Howells
2023-01-31 17:12 ` [PATCH net-next 13/13] rxrpc: Kill service bundle David Howells
2023-02-02 12:10 ` [PATCH net-next 00/13] rxrpc: Increasing SACK size and moving away from softirq, part 5 patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230131171227.3912130-9-dhowells@redhat.com \
--to=dhowells@redhat.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=linux-afs@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=marc.dionne@auristor.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox