* [RFC mptcp-next 1/6] mptcp: add MP_FAIL echo support
2022-02-16 11:05 [RFC mptcp-next 0/6] MP_FAIL echo and retrans Geliang Tang
@ 2022-02-16 11:05 ` Geliang Tang
2022-02-16 11:05 ` [RFC mptcp-next 2/6] mptcp: add mibs for MP_FAIL echo Geliang Tang
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Geliang Tang @ 2022-02-16 11:05 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
Add MP_FAIL echo support.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/options.c | 22 +++++++++++++++++++---
net/mptcp/pm.c | 36 +++++++++++++++++++++++++++++++++++-
net/mptcp/protocol.h | 3 +++
net/mptcp/subflow.c | 5 ++---
4 files changed, 59 insertions(+), 7 deletions(-)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 88f4ebbd6515..c4d66fca9c5d 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -806,7 +806,7 @@ static bool mptcp_established_options_mp_fail(struct sock *sk,
*size = TCPOLEN_MPTCP_FAIL;
opts->suboptions |= OPTION_MPTCP_FAIL;
- opts->fail_seq = subflow->map_seq;
+ opts->fail_seq = subflow->fail_seq;
pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
@@ -1092,6 +1092,18 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
return hmac == mp_opt->ahmac;
}
+static bool mptcp_is_mp_fail_echo(struct mptcp_sock *msk, u64 fail_seq)
+{
+ struct mptcp_subflow_context *subflow;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (subflow->fail_seq == fail_seq)
+ return true;
+ }
+
+ return false;
+}
+
/* Return false if a subflow has been reset, else return true */
bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
{
@@ -1154,8 +1166,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
if (mp_opt.suboptions & OPTION_MPTCP_FAIL) {
- mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX);
+ if (!mptcp_is_mp_fail_echo(msk, mp_opt.fail_seq)) {
+ mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX);
+ } else {
+ mptcp_pm_mp_fail_echoed(sk, mp_opt.fail_seq);
+ }
}
if (mp_opt.suboptions & OPTION_MPTCP_RST) {
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index d0d31d5c198a..d58fddf4d5a2 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -279,8 +279,42 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
pr_debug("fail_seq=%llu", fail_seq);
- if (!mptcp_has_another_subflow(sk) && READ_ONCE(msk->allow_infinite_fallback))
+ if (mptcp_has_another_subflow(sk)) {
+ struct mptcp_subflow_context *tmp;
+
+ mptcp_for_each_subflow(msk, tmp) {
+ if (tmp != subflow) {
+ tmp->fail_seq = fail_seq;
+ tmp->send_mp_fail = 1;
+ }
+ }
+ } else if (READ_ONCE(msk->allow_infinite_fallback)) {
+ subflow->fail_seq = fail_seq;
+ subflow->send_mp_fail = 1;
subflow->send_infinite_map = 1;
+ }
+}
+
+void mptcp_pm_mp_fail_echoed(struct sock *sk, u64 fail_seq)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ pr_debug("echoed fail_seq=%llu", fail_seq);
+
+ if (mptcp_has_another_subflow(sk)) {
+ struct mptcp_subflow_context *tmp;
+
+ mptcp_for_each_subflow(msk, tmp) {
+ if (tmp != subflow && tmp->fail_seq == fail_seq) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(tmp);
+
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ subflow_sched_work_if_closed(msk, ssk);
+ }
+ }
+ }
}
/* path manager helpers */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 18ca0248c084..febf32c9a139 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -433,6 +433,7 @@ struct mptcp_subflow_context {
u32 map_data_len;
__wsum map_data_csum;
u32 map_csum_len;
+ u64 fail_seq;
u32 request_mptcp : 1, /* send MP_CAPABLE */
request_join : 1, /* send MP_JOIN */
request_bkup : 1,
@@ -604,6 +605,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
+void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk);
static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
@@ -759,6 +761,7 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
+void mptcp_pm_mp_fail_echoed(struct sock *sk, u64 fail_seq);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 8d086641bdc5..a8b5b8bf45e5 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -910,6 +910,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
subflow->map_data_csum);
if (unlikely(csum)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
+ subflow->fail_seq = subflow->map_seq;
subflow->send_mp_fail = 1;
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
@@ -1083,7 +1084,7 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
}
/* sched mptcp worker to remove the subflow if no more data is pending */
-static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
+void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
{
struct sock *sk = (struct sock *)msk;
@@ -1165,8 +1166,6 @@ static bool subflow_check_data_avail(struct sock *ssk)
if (subflow->send_mp_fail) {
if (mptcp_has_another_subflow(ssk) ||
!READ_ONCE(msk->allow_infinite_fallback)) {
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
tcp_send_active_reset(ssk, GFP_ATOMIC);
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC mptcp-next 2/6] mptcp: add mibs for MP_FAIL echo
2022-02-16 11:05 [RFC mptcp-next 0/6] MP_FAIL echo and retrans Geliang Tang
2022-02-16 11:05 ` [RFC mptcp-next 1/6] mptcp: add MP_FAIL echo support Geliang Tang
@ 2022-02-16 11:05 ` Geliang Tang
2022-02-16 11:05 ` [RFC mptcp-next 3/6] selftests: mptcp: add MP_FAIL echo mibs check Geliang Tang
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Geliang Tang @ 2022-02-16 11:05 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
Add MP_FAIL echo support.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/mib.c | 2 ++
net/mptcp/mib.h | 2 ++
net/mptcp/options.c | 1 +
net/mptcp/pm.c | 2 ++
4 files changed, 7 insertions(+)
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index d93a8c9996fd..f3070daa013b 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -49,6 +49,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX),
SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX),
+ SNMP_MIB_ITEM("MPFailEchoTx", MPTCP_MIB_MPFAILECHOTX),
+ SNMP_MIB_ITEM("MPFailEchoRx", MPTCP_MIB_MPFAILECHORX),
SNMP_MIB_ITEM("MPFastcloseTx", MPTCP_MIB_MPFASTCLOSETX),
SNMP_MIB_ITEM("MPFastcloseRx", MPTCP_MIB_MPFASTCLOSERX),
SNMP_MIB_ITEM("MPRstTx", MPTCP_MIB_MPRSTTX),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 529d07af9e14..83219721d337 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -42,6 +42,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
MPTCP_MIB_MPFAILTX, /* Transmit a MP_FAIL */
MPTCP_MIB_MPFAILRX, /* Received a MP_FAIL */
+ MPTCP_MIB_MPFAILECHOTX, /* Transmit a MP_FAIL echo */
+ MPTCP_MIB_MPFAILECHORX, /* Received a MP_FAIL echo */
MPTCP_MIB_MPFASTCLOSETX, /* Transmit a MP_FASTCLOSE */
MPTCP_MIB_MPFASTCLOSERX, /* Received a MP_FASTCLOSE */
MPTCP_MIB_MPRSTTX, /* Transmit a MP_RST */
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index c4d66fca9c5d..67dad62f34f6 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1171,6 +1171,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX);
} else {
mptcp_pm_mp_fail_echoed(sk, mp_opt.fail_seq);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILECHORX);
}
}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index d58fddf4d5a2..5e133b249492 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -286,12 +286,14 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
if (tmp != subflow) {
tmp->fail_seq = fail_seq;
tmp->send_mp_fail = 1;
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILECHOTX);
}
}
} else if (READ_ONCE(msk->allow_infinite_fallback)) {
subflow->fail_seq = fail_seq;
subflow->send_mp_fail = 1;
subflow->send_infinite_map = 1;
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILECHOTX);
}
}
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC mptcp-next 3/6] selftests: mptcp: add MP_FAIL echo mibs check
2022-02-16 11:05 [RFC mptcp-next 0/6] MP_FAIL echo and retrans Geliang Tang
2022-02-16 11:05 ` [RFC mptcp-next 1/6] mptcp: add MP_FAIL echo support Geliang Tang
2022-02-16 11:05 ` [RFC mptcp-next 2/6] mptcp: add mibs for MP_FAIL echo Geliang Tang
@ 2022-02-16 11:05 ` Geliang Tang
2022-02-16 11:05 ` [RFC mptcp-next 4/6] mptcp: add a new sysctl mp_fail_timeout Geliang Tang
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Geliang Tang @ 2022-02-16 11:05 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch added the MP_FAIL echo mibs check.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
.../testing/selftests/net/mptcp/mptcp_join.sh | 24 +++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 37b3d00b2dcb..01b9cc190134 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -871,6 +871,8 @@ chk_fail_nr()
{
local fail_tx=$1
local fail_rx=$2
+ local echo_tx=${3:-$fail_tx}
+ local echo_rx=${4:-$fail_rx}
local count
local dump_stats
@@ -896,6 +898,28 @@ chk_fail_nr()
echo "[ ok ]"
fi
+ printf "%-${nr_blank}s %s" " " "etx"
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailEchoTx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$echo_tx" ]; then
+ echo "[fail] got $count MP_FAIL[s] RX expected $echo_tx"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - echorx"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailEchoRx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$echo_rx" ]; then
+ echo "[fail] got $count MP_FAIL[s] RX expected $echo_rx"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
[ "${dump_stats}" = 1 ] && dump_stats
}
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC mptcp-next 4/6] mptcp: add a new sysctl mp_fail_timeout
2022-02-16 11:05 [RFC mptcp-next 0/6] MP_FAIL echo and retrans Geliang Tang
` (2 preceding siblings ...)
2022-02-16 11:05 ` [RFC mptcp-next 3/6] selftests: mptcp: add MP_FAIL echo mibs check Geliang Tang
@ 2022-02-16 11:05 ` Geliang Tang
2022-02-16 11:05 ` [RFC mptcp-next 5/6] mptcp: add MP_FAIL retrans support Geliang Tang
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Geliang Tang @ 2022-02-16 11:05 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch added a new sysctl, named mp_fail_timeout, to control the
timeout value (in seconds) of the MP_FAIL retransmission.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
Documentation/networking/mptcp-sysctl.rst | 10 ++++++++++
net/mptcp/ctrl.c | 14 ++++++++++++++
net/mptcp/protocol.h | 1 +
3 files changed, 25 insertions(+)
diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst
index e263dfcc4b40..3ad19e04ecce 100644
--- a/Documentation/networking/mptcp-sysctl.rst
+++ b/Documentation/networking/mptcp-sysctl.rst
@@ -75,3 +75,13 @@ stale_loss_cnt - INTEGER
This is a per-namespace sysctl.
Default: 4
+
+mp_fail_timeout - INTEGER (seconds)
+ Set the timeout after which a MP_FAIL control message will be
+ resent to an MPTCP peer that has not acknowledged a previous
+ MP_FAIL message.
+
+ The default value matches TCP_RTO_MAX. This is a per-namespace
+ sysctl.
+
+ Default: 120
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index ae20b7d92e28..a211af9b19e8 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -32,6 +32,7 @@ struct mptcp_pernet {
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
u8 pm_type;
+ unsigned int mp_fail_timeout;
};
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -69,6 +70,11 @@ int mptcp_get_pm_type(const struct net *net)
return mptcp_get_pernet(net)->pm_type;
}
+unsigned int mptcp_get_mp_fail_timeout(const struct net *net)
+{
+ return mptcp_get_pernet(net)->mp_fail_timeout;
+}
+
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
@@ -77,6 +83,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
+ pernet->mp_fail_timeout = TCP_RTO_MAX;
}
#ifdef CONFIG_SYSCTL
@@ -128,6 +135,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &mptcp_pm_type_max
},
+ {
+ .procname = "mp_fail_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
{}
};
@@ -149,6 +162,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[3].data = &pernet->allow_join_initial_addr_port;
table[4].data = &pernet->stale_loss_cnt;
table[5].data = &pernet->pm_type;
+ table[6].data = &pernet->mp_fail_timeout;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index febf32c9a139..595222b08fbd 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -583,6 +583,7 @@ int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
int mptcp_get_pm_type(const struct net *net);
+unsigned int mptcp_get_mp_fail_timeout(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC mptcp-next 5/6] mptcp: add MP_FAIL retrans support
2022-02-16 11:05 [RFC mptcp-next 0/6] MP_FAIL echo and retrans Geliang Tang
` (3 preceding siblings ...)
2022-02-16 11:05 ` [RFC mptcp-next 4/6] mptcp: add a new sysctl mp_fail_timeout Geliang Tang
@ 2022-02-16 11:05 ` Geliang Tang
2022-02-16 11:05 ` [RFC mptcp-next 6/6] selftests: mptcp: MP_FAIL timeout testcases TODO Geliang Tang
2022-02-17 1:21 ` [RFC mptcp-next 0/6] MP_FAIL echo and retrans Mat Martineau
6 siblings, 0 replies; 8+ messages in thread
From: Geliang Tang @ 2022-02-16 11:05 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
Add MP_FAIL retrans support.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/pm.c | 3 +++
net/mptcp/protocol.c | 1 +
net/mptcp/protocol.h | 3 +++
net/mptcp/subflow.c | 25 +++++++++++++++++++++++++
4 files changed, 32 insertions(+)
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 5e133b249492..e85acdf49d95 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -311,11 +311,14 @@ void mptcp_pm_mp_fail_echoed(struct sock *sk, u64 fail_seq)
if (tmp != subflow && tmp->fail_seq == fail_seq) {
struct sock *ssk = mptcp_subflow_tcp_sock(tmp);
+ sk_stop_timer_sync(ssk, &tmp->mp_fail_timer);
ssk->sk_err = EBADMSG;
tcp_set_state(ssk, TCP_CLOSE);
subflow_sched_work_if_closed(msk, ssk);
}
}
+ } else {
+ sk_stop_timer_sync(sk, &subflow->mp_fail_timer);
}
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4599bde215b2..85f7c76aeb90 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2307,6 +2307,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
sock_orphan(ssk);
subflow->disposable = 1;
+ sk_stop_timer_sync(ssk, &subflow->mp_fail_timer);
/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
* the ssk has been already destroyed, we just need to release the
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 595222b08fbd..9f6e8774b069 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -467,6 +467,7 @@ struct mptcp_subflow_context {
u8 reset_transient:1;
u8 reset_reason:4;
u8 stale_count;
+ u8 retrans_times;
long delegated_status;
@@ -483,6 +484,8 @@ struct mptcp_subflow_context {
void (*tcp_state_change)(struct sock *sk);
void (*tcp_error_report)(struct sock *sk);
+ struct timer_list mp_fail_timer;
+
struct rcu_head rcu;
};
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index a8b5b8bf45e5..9bbc157fd713 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -27,6 +27,8 @@
#include <trace/events/mptcp.h>
+#define MP_FAIL_RETRANS_MAX 3
+
static void mptcp_subflow_ops_undo_override(struct sock *ssk);
static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
@@ -1099,9 +1101,29 @@ void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
}
}
+static void mptcp_mp_fail_timer(struct timer_list *timer)
+{
+ struct mptcp_subflow_context *subflow = from_timer(subflow, timer, mp_fail_timer);
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ struct net *net = sock_net(ssk);
+
+ if (inet_sk_state_load(ssk) == TCP_CLOSE)
+ return;
+
+ subflow->fail_seq = subflow->map_seq;
+ subflow->send_mp_fail = 1;
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
+ pr_debug("retransmit MP_FAIL %u fail_seq=%llu",
+ subflow->retrans_times++, subflow->fail_seq);
+
+ if (subflow->retrans_times < MP_FAIL_RETRANS_MAX)
+ sk_reset_timer(ssk, timer, jiffies + mptcp_get_mp_fail_timeout(net));
+}
+
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct net *net = sock_net(ssk);
enum mapping_status status;
struct mptcp_sock *msk;
struct sk_buff *skb;
@@ -1173,6 +1195,9 @@ static bool subflow_check_data_avail(struct sock *ssk)
sk_eat_skb(ssk, skb);
}
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ timer_setup(&subflow->mp_fail_timer, mptcp_mp_fail_timer, 0);
+ sk_reset_timer(ssk, &subflow->mp_fail_timer,
+ jiffies + mptcp_get_mp_fail_timeout(net));
return true;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC mptcp-next 6/6] selftests: mptcp: MP_FAIL timeout testcases TODO
2022-02-16 11:05 [RFC mptcp-next 0/6] MP_FAIL echo and retrans Geliang Tang
` (4 preceding siblings ...)
2022-02-16 11:05 ` [RFC mptcp-next 5/6] mptcp: add MP_FAIL retrans support Geliang Tang
@ 2022-02-16 11:05 ` Geliang Tang
2022-02-17 1:21 ` [RFC mptcp-next 0/6] MP_FAIL echo and retrans Mat Martineau
6 siblings, 0 replies; 8+ messages in thread
From: Geliang Tang @ 2022-02-16 11:05 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
This patch added the MP_FAIL timeout testcases.
Dosen't work yet.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
.../testing/selftests/net/mptcp/mptcp_join.sh | 60 +++++++++++++++++++
1 file changed, 60 insertions(+)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 01b9cc190134..abb8bc3468ae 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -41,6 +41,24 @@ CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
6 0 0 65535,
6 0 0 0"
+# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x60) ||
+# (ip6 && (ip6[74] & 0xf0) == 0x60)'"
+CBPF_MPTCP_SUBOPTION_MP_FAIL="14,
+ 48 0 0 0,
+ 84 0 0 240,
+ 21 0 3 64,
+ 48 0 0 54,
+ 84 0 0 240,
+ 21 6 7 96,
+ 48 0 0 0,
+ 84 0 0 240,
+ 21 0 4 96,
+ 48 0 0 74,
+ 84 0 0 240,
+ 21 0 1 96,
+ 6 0 0 65535,
+ 6 0 0 0"
+
init_partial()
{
capout=$(mktemp)
@@ -261,6 +279,27 @@ reset_with_fail()
index 100 || exit 1
}
+reset_with_fail_timeout()
+{
+ local i="$1"
+ local ip="${2:-4}"
+ local tables
+
+ tables="iptables"
+ if [ $ip -eq 6 ]; then
+ tables="ip6tables"
+ fi
+
+ reset_with_fail $i $ip
+
+ ip netns exec $ns1 sysctl -q net.mptcp.mp_fail_timeout=1
+ ip netns exec $ns2 $tables -A OUTPUT -p tcp \
+ -m tcp --tcp-option 30 \
+ -m bpf --bytecode \
+ "$CBPF_MPTCP_SUBOPTION_MP_FAIL" \
+ -j DROP
+}
+
print_file_err()
{
ls -l "$1" 1>&2
@@ -2455,6 +2494,27 @@ fail_tests()
1 \
0 \
1
+
+ # single subflow
+ reset_with_fail_timeout 1
+ run_tests $ns1 $ns2 10.0.1.1 128
+ chk_join_nr "MP_FAIL timeout 1: $(pedit_action_pkts) corrupted pkts" 0 0 0 \
+ +1 +0 \
+ 1 \
+ 0 \
+ 1
+
+ # timeout test
+ reset_with_fail_timeout 2
+ tc -n $ns2 qdisc add dev ns2eth1 root netem rate 20mbit delay 1
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 1024
+ chk_join_nr "MP_FAIL timeout 2: $(pedit_action_pkts) corrupted pkts" 1 1 1 \
+ +1 +0 \
+ 1 \
+ 1
}
all_tests()
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [RFC mptcp-next 0/6] MP_FAIL echo and retrans
2022-02-16 11:05 [RFC mptcp-next 0/6] MP_FAIL echo and retrans Geliang Tang
` (5 preceding siblings ...)
2022-02-16 11:05 ` [RFC mptcp-next 6/6] selftests: mptcp: MP_FAIL timeout testcases TODO Geliang Tang
@ 2022-02-17 1:21 ` Mat Martineau
6 siblings, 0 replies; 8+ messages in thread
From: Mat Martineau @ 2022-02-17 1:21 UTC (permalink / raw)
To: Geliang Tang; +Cc: mptcp
On Wed, 16 Feb 2022, Geliang Tang wrote:
> This patchset added MP_FAIL echo and retrans support. Except the last
> patch, others work well.
>
> Here I have some questions about it.
>
> 1. How to distinguish an MP_FAIL packet and an MP_FAIL echo packet?
>
> Unlike ADD_ADDR, there's no echo bit in a MP_FAIL packet. Patch 1 used
> simple method to distinguish them.
>
> MP_FAIL carries the data sequence number of the checksum failure.
The RFC says (after Figure 16):
"The receiver of this option MUST discard all data following the data
sequence number specified."
So MP_FAIL carries the sequence number of the last "good" byte received.
> MP_FAIL echo carries the received data sequence number of a MP_FAIL.
I'm assuming by "MP_FAIL echo" you're referring to the MP_FAIL response
mentioned in RFC 8684 section 3.7: "While in theory paths may only be
damaged in one direction -- and the MP_FAIL signal affects only one
direction of traffic -- for simplicity of implementation, the receiver of
an MP_FAIL MUST also respond with an MP_FAIL in the reverse direction and
entirely revert to a regular TCP session."
The MP_FAIL sent in response doesn't contain the received data sequence
number of an MP_FAIL, as I've read the RFC (if I missed something please
point me to it). As I read the sentence above, the MP_FAIL sent in
response just contains the sequence number of the last "good" byte
received *by this peer*. The difference is that this MP_FAIL was triggered
by a received MP_FAIL instead of a checksum failure.
> 2. For the multiple subflow case, MP_FAIL echo is sent on another subflow,
> not the subflow receiving this MP_FAIL. Since the receiving subflow will
> be closed by MP_RST after a while.
>
If there are multiple subflows, MP_FAIL is only sent with TCP RST. The
subflow is immediately disconnected and there is no need to send MP_FAIL
on any other subflows since they are still working properly and can handle
MPTCP-level retransmissions to recover the data stream.
A MP_FAIL sent in response to another MP_FAIL only happens in the "single
subflow with checksum error and contiguous data" special case.
> 3. Patch 5 added mp_fail_timer as a struct member of struct
> mptcp_subflow_context. Is there a better place to add this timer?
>
I'm not sure a separate timer is needed?
There's no need to retransmit MP_FAIL when there are multiple subflows, or
a single subflow with noncontiguous data.
In the remaining case (single subflow with contiguous data), the
connection is trying to fall back so there's no need to use
msk->timer_ival for MPTCP-level retransmits or DATA_FIN retransmits.
msk->timer_ival is available to trigger a check if we have not received a
response from the peer after sending MP_FAIL.
I don't think we should put a lot of work in to recovering from lost
MP_FAILs - if the peer doesn't respond properly it's an option to just
give up and reset the connection.
-Mat
> 4. Just like the way of dropping the ADD_ADDR packets in the ADD_ADDR
> timeout testcases, patch 6 tried to use iptables bpf to drop the MP_FAIL
> packets too. But it doesn't work.
>
> Drop ADD_ADDR packets using this bpf rule:
>
> "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
> (ip6 && (ip6[74] & 0xf0) == 0x30)'"
>
> I used a similar rule to drop MP_FAIL packets:
>
> "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x60) ||
> (ip6 && (ip6[74] & 0xf0) == 0x60)'"
>
> I thinks the offset shouldn't be 54 or 74 here. Matt, could you please
> give me some help about how to get the right offset values?
>
> Thanks.
> -Geliang
>
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/261
> Depends on: the "add mp_fail testcases" series.
>
> Geliang Tang (6):
> mptcp: add MP_FAIL echo support
> mptcp: add mibs for MP_FAIL echo
> selftests: mptcp: add MP_FAIL echo mibs check
> mptcp: add a new sysctl mp_fail_timeout
> mptcp: add MP_FAIL retrans support
> selftests: mptcp: MP_FAIL timeout testcases TODO
>
> Documentation/networking/mptcp-sysctl.rst | 10 +++
> net/mptcp/ctrl.c | 14 ++++
> net/mptcp/mib.c | 2 +
> net/mptcp/mib.h | 2 +
> net/mptcp/options.c | 23 ++++-
> net/mptcp/pm.c | 41 ++++++++-
> net/mptcp/protocol.c | 1 +
> net/mptcp/protocol.h | 7 ++
> net/mptcp/subflow.c | 30 ++++++-
> .../testing/selftests/net/mptcp/mptcp_join.sh | 84 +++++++++++++++++++
> 10 files changed, 207 insertions(+), 7 deletions(-)
>
> --
> 2.34.1
>
>
>
--
Mat Martineau
Intel
^ permalink raw reply [flat|nested] 8+ messages in thread