From: Mat Martineau <mathew.j.martineau@linux.intel.com>
To: netdev@vger.kernel.org
Cc: Florian Westphal <fw@strlen.de>,
kuba@kernel.org, mptcp@lists.01.org,
Paolo Abeni <pabeni@redhat.com>,
Mat Martineau <mathew.j.martineau@linux.intel.com>
Subject: [PATCH net-next 09/10] mptcp: track window announced to peer
Date: Thu, 19 Nov 2020 11:46:02 -0800 [thread overview]
Message-ID: <20201119194603.103158-10-mathew.j.martineau@linux.intel.com> (raw)
In-Reply-To: <20201119194603.103158-1-mathew.j.martineau@linux.intel.com>
From: Florian Westphal <fw@strlen.de>
OoO handling attempts to detect when packet is out-of-window by testing
current ack sequence and remaining space vs. sequence number.
This doesn't work reliably. Store the highest allowed sequence number
that we've announced and use it to detect oow packets.
Do this when mptcp options get written to the packet (wire format).
For this to work we need to move the write_options call until after
stack selected a new tcp window.
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
include/net/mptcp.h | 3 ++-
net/ipv4/tcp_output.c | 11 +++++++----
net/mptcp/options.c | 22 +++++++++++++++++++++-
net/mptcp/protocol.c | 12 +++++++-----
net/mptcp/protocol.h | 1 +
5 files changed, 38 insertions(+), 11 deletions(-)
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 6e706d838e4e..b6cf07143a8a 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -88,7 +88,8 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
struct mptcp_out_options *opts);
void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);
-void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts);
+void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
+ struct mptcp_out_options *opts);
/* move the skb extension owership, with the assumption that 'to' is
* newly allocated
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 99905bc01d40..41880d3521ed 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -445,11 +445,12 @@ struct tcp_out_options {
struct mptcp_out_options mptcp;
};
-static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts)
+static void mptcp_options_write(__be32 *ptr, const struct tcp_sock *tp,
+ struct tcp_out_options *opts)
{
#if IS_ENABLED(CONFIG_MPTCP)
if (unlikely(OPTION_MPTCP & opts->options))
- mptcp_write_options(ptr, &opts->mptcp);
+ mptcp_write_options(ptr, tp, &opts->mptcp);
#endif
}
@@ -701,7 +702,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
smc_options_write(ptr, &options);
- mptcp_options_write(ptr, opts);
+ mptcp_options_write(ptr, tp, opts);
}
static void smc_set_option(const struct tcp_sock *tp,
@@ -1346,7 +1347,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
}
}
- tcp_options_write((__be32 *)(th + 1), tp, &opts);
skb_shinfo(skb)->gso_type = sk->sk_gso_type;
if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
th->window = htons(tcp_select_window(sk));
@@ -1357,6 +1357,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
*/
th->window = htons(min(tp->rcv_wnd, 65535U));
}
+
+ tcp_options_write((__be32 *)(th + 1), tp, &opts);
+
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
if (md5) {
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index d7afffcc87c1..248e3930c0cb 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1010,7 +1010,24 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
}
-void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
+static void mptcp_set_rwin(const struct tcp_sock *tp)
+{
+ const struct sock *ssk = (const struct sock *)tp;
+ const struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk;
+ u64 ack_seq;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ msk = mptcp_sk(subflow->conn);
+
+ ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd;
+
+ if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent)))
+ WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
+}
+
+void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
+ struct mptcp_out_options *opts)
{
if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
@@ -1167,4 +1184,7 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
}
}
+
+ if (tp)
+ mptcp_set_rwin(tp);
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c2629190b1ce..4ae2c4a30e44 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -168,19 +168,19 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb)
struct rb_node **p, *parent;
u64 seq, end_seq, max_seq;
struct sk_buff *skb1;
- int space;
seq = MPTCP_SKB_CB(skb)->map_seq;
end_seq = MPTCP_SKB_CB(skb)->end_seq;
- space = tcp_space(sk);
- max_seq = space > 0 ? space + msk->ack_seq : msk->ack_seq;
+ max_seq = READ_ONCE(msk->rcv_wnd_sent);
pr_debug("msk=%p seq=%llx limit=%llx empty=%d", msk, seq, max_seq,
RB_EMPTY_ROOT(&msk->out_of_order_queue));
- if (after64(seq, max_seq)) {
+ if (after64(end_seq, max_seq)) {
/* out of window */
mptcp_drop(sk, skb);
- pr_debug("oow by %ld", (unsigned long)seq - (unsigned long)max_seq);
+ pr_debug("oow by %lld, rcv_wnd_sent %llu\n",
+ (unsigned long long)end_seq - (unsigned long)max_seq,
+ (unsigned long long)msk->rcv_wnd_sent);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_NODSSWINDOW);
return;
}
@@ -2294,6 +2294,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
ack_seq++;
WRITE_ONCE(msk->ack_seq, ack_seq);
+ WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}
sock_reset_flag(nsk, SOCK_RCU_FREE);
@@ -2586,6 +2587,7 @@ void mptcp_finish_connect(struct sock *ssk)
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
WRITE_ONCE(msk->ack_seq, ack_seq);
+ WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
WRITE_ONCE(msk->can_ack, 1);
atomic64_set(&msk->snd_una, msk->write_seq);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 4d6dd4adaaaf..67f86818203f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -216,6 +216,7 @@ struct mptcp_sock {
u64 write_seq;
u64 snd_nxt;
u64 ack_seq;
+ u64 rcv_wnd_sent;
u64 rcv_data_fin_seq;
struct sock *last_snd;
int snd_burst;
--
2.29.2
next prev parent reply other threads:[~2020-11-19 19:46 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-19 19:45 [PATCH net-next 00/10] mptcp: More miscellaneous MPTCP fixes Mat Martineau
2020-11-19 19:45 ` [PATCH net-next 01/10] mptcp: drop WORKER_RUNNING status bit Mat Martineau
2020-11-19 19:45 ` [PATCH net-next 02/10] mptcp: fix state tracking for fallback socket Mat Martineau
2020-11-19 19:45 ` [PATCH net-next 03/10] mptcp: skip to next candidate if subflow has unacked data Mat Martineau
2020-11-19 19:45 ` [PATCH net-next 04/10] selftests: mptcp: add link failure test case Mat Martineau
2020-11-19 19:45 ` [PATCH net-next 05/10] mptcp: keep unaccepted MPC subflow into join list Mat Martineau
2020-11-19 19:45 ` [PATCH net-next 06/10] mptcp: change add_addr_signal type Mat Martineau
2020-11-19 19:46 ` [PATCH net-next 07/10] mptcp: send out dedicated ADD_ADDR packet Mat Martineau
2020-11-19 19:46 ` [PATCH net-next 08/10] selftests: mptcp: add ADD_ADDR IPv6 test cases Mat Martineau
2020-11-19 19:46 ` Mat Martineau [this message]
2020-11-19 19:46 ` [PATCH net-next 10/10] mptcp: refine MPTCP-level ack scheduling Mat Martineau
2020-11-23 11:57 ` Eric Dumazet
2020-11-23 14:21 ` Paolo Abeni
2020-11-20 23:35 ` [PATCH net-next 00/10] mptcp: More miscellaneous MPTCP fixes Jakub Kicinski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201119194603.103158-10-mathew.j.martineau@linux.intel.com \
--to=mathew.j.martineau@linux.intel.com \
--cc=fw@strlen.de \
--cc=kuba@kernel.org \
--cc=mptcp@lists.01.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).