* [PATCH mptcp-next v2 0/2] mptcp: Support recording MPTCP subflows in RPS table
@ 2025-08-26 4:30 Christoph Paasch
2025-08-26 4:30 ` [PATCH mptcp-next v2 1/2] net: Add rfs_needed() helper Christoph Paasch
2025-08-26 4:30 ` [PATCH mptcp-next v2 2/2] mptcp: record subflows in RPS table Christoph Paasch
0 siblings, 2 replies; 3+ messages in thread
From: Christoph Paasch @ 2025-08-26 4:30 UTC (permalink / raw)
To: MPTCP Upstream, Matthieu Baerts, Mat Martineau, Geliang Tang
Cc: Christoph Paasch
If we want to steer interrupts to the right CPU-core with aRFS, we need
to record the subflows in the RPS-table.
This patchset achieves this by iterating over the subflows and calling
sock_rps_record_flow().
Patch 1/2 adds a helper (rfs_is_needed()) to allow checking whether RFS
is enabled before iterating over the subflows. The helper allows to
rework the code in rps.h to make it more streamlined.
Signed-off-by: Christoph Paasch <cpaasch@openai.com>
---
Changes in v2:
- Introduce patch 1/2 which adds rfs_is_needed (Matthieu Baerts <matttbe@kernel.org>)
- Use helper rfs_is_needed() to decide whether or not to iterate over
subflows (Matthieu Baerts <matttbe@kernel.org>)
- Also call mptcp_rps_record_subflows() in mptcp_stream_accept (Matthieu Baerts <matttbe@kernel.org>)
- Link to v1: https://lore.kernel.org/r/20250807-b4-mptcp_perf-v1-1-e89cc14179cc@openai.com
---
Christoph Paasch (2):
net: Add rfs_needed() helper
mptcp: record subflows in RPS table
include/net/rps.h | 85 ++++++++++++++++++++++++++++++++++------------------
net/mptcp/protocol.c | 21 +++++++++++++
2 files changed, 77 insertions(+), 29 deletions(-)
---
base-commit: 4a7f79b50b675c068ff94cd750d5a9f03fd3a0fa
change-id: 20250731-b4-mptcp_perf-02912dd4f65b
Best regards,
--
Christoph Paasch <cpaasch@openai.com>
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH mptcp-next v2 1/2] net: Add rfs_needed() helper
2025-08-26 4:30 [PATCH mptcp-next v2 0/2] mptcp: Support recording MPTCP subflows in RPS table Christoph Paasch
@ 2025-08-26 4:30 ` Christoph Paasch
2025-08-26 4:30 ` [PATCH mptcp-next v2 2/2] mptcp: record subflows in RPS table Christoph Paasch
1 sibling, 0 replies; 3+ messages in thread
From: Christoph Paasch @ 2025-08-26 4:30 UTC (permalink / raw)
To: MPTCP Upstream, Matthieu Baerts, Mat Martineau, Geliang Tang
Cc: Christoph Paasch
Add a helper to check if RFS is needed or not. Allows to make the code a
bit cleaner and the next patch to have MPTCP use this helper to decide
whether or not to iterate over the subflows.
tun_flow_update() was calling sock_rps_record_flow_hash() regardless of
the state of rfs_needed. This was not really a bug as sock_flow_table
simply ends up being NULL and thus everything will be fine.
This commit here thus also implicitly makes tun_flow_update() respect
the state of rfs_needed.
Suggested-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Christoph Paasch <cpaasch@openai.com>
---
include/net/rps.h | 85 ++++++++++++++++++++++++++++++++++++-------------------
1 file changed, 56 insertions(+), 29 deletions(-)
diff --git a/include/net/rps.h b/include/net/rps.h
index d8ab3a08bcc4882e2ad9c84c22ef26b254c14680..a89d2822a257a1fcf54e9722fdc22d01c28cc87b 100644
--- a/include/net/rps.h
+++ b/include/net/rps.h
@@ -82,11 +82,8 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
WRITE_ONCE(table->ents[index], val);
}
-#endif /* CONFIG_RPS */
-
-static inline void sock_rps_record_flow_hash(__u32 hash)
+static inline void _sock_rps_record_flow_hash(__u32 hash)
{
-#ifdef CONFIG_RPS
struct rps_sock_flow_table *sock_flow_table;
if (!hash)
@@ -96,42 +93,33 @@ static inline void sock_rps_record_flow_hash(__u32 hash)
if (sock_flow_table)
rps_record_sock_flow(sock_flow_table, hash);
rcu_read_unlock();
-#endif
}
-static inline void sock_rps_record_flow(const struct sock *sk)
+static inline void _sock_rps_record_flow(const struct sock *sk)
{
-#ifdef CONFIG_RPS
- if (static_branch_unlikely(&rfs_needed)) {
- /* Reading sk->sk_rxhash might incur an expensive cache line
- * miss.
- *
- * TCP_ESTABLISHED does cover almost all states where RFS
- * might be useful, and is cheaper [1] than testing :
- * IPv4: inet_sk(sk)->inet_daddr
- * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
- * OR an additional socket flag
- * [1] : sk_state and sk_prot are in the same cache line.
+ /* Reading sk->sk_rxhash might incur an expensive cache line
+ * miss.
+ *
+ * TCP_ESTABLISHED does cover almost all states where RFS
+ * might be useful, and is cheaper [1] than testing :
+ * IPv4: inet_sk(sk)->inet_daddr
+ * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
+ * OR an additional socket flag
+ * [1] : sk_state and sk_prot are in the same cache line.
+ */
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* This READ_ONCE() is paired with the WRITE_ONCE()
+ * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
*/
- if (sk->sk_state == TCP_ESTABLISHED) {
- /* This READ_ONCE() is paired with the WRITE_ONCE()
- * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
- */
- sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
- }
+ _sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
}
-#endif
}
-static inline void sock_rps_delete_flow(const struct sock *sk)
+static inline void _sock_rps_delete_flow(const struct sock *sk)
{
-#ifdef CONFIG_RPS
struct rps_sock_flow_table *table;
u32 hash, index;
- if (!static_branch_unlikely(&rfs_needed))
- return;
-
hash = READ_ONCE(sk->sk_rxhash);
if (!hash)
return;
@@ -144,6 +132,45 @@ static inline void sock_rps_delete_flow(const struct sock *sk)
WRITE_ONCE(table->ents[index], RPS_NO_CPU);
}
rcu_read_unlock();
+}
+#endif /* CONFIG_RPS */
+
+static inline bool rfs_is_needed(void)
+{
+#ifdef CONFIG_RPS
+ return static_branch_unlikely(&rfs_needed);
+#else
+ return false;
+#endif
+}
+
+static inline void sock_rps_record_flow_hash(__u32 hash)
+{
+#ifdef CONFIG_RPS
+ if (!rfs_is_needed())
+ return;
+
+ _sock_rps_record_flow_hash(hash);
+#endif
+}
+
+static inline void sock_rps_record_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ if (!rfs_is_needed())
+ return;
+
+ _sock_rps_record_flow(sk);
+#endif
+}
+
+static inline void sock_rps_delete_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ if (!rfs_is_needed())
+ return;
+
+ _sock_rps_delete_flow(sk);
#endif
}
--
2.50.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH mptcp-next v2 2/2] mptcp: record subflows in RPS table
2025-08-26 4:30 [PATCH mptcp-next v2 0/2] mptcp: Support recording MPTCP subflows in RPS table Christoph Paasch
2025-08-26 4:30 ` [PATCH mptcp-next v2 1/2] net: Add rfs_needed() helper Christoph Paasch
@ 2025-08-26 4:30 ` Christoph Paasch
1 sibling, 0 replies; 3+ messages in thread
From: Christoph Paasch @ 2025-08-26 4:30 UTC (permalink / raw)
To: MPTCP Upstream, Matthieu Baerts, Mat Martineau, Geliang Tang
Cc: Christoph Paasch
Accelerated Receive Flow Steering (aRFS) relies on sockets recording
their RX flow hash into the rps_sock_flow_table so that incoming packets
are steered to the CPU where the application runs.
With MPTCP, the application interacts with the parent MPTCP socket while
data is carried over per-subflow TCP sockets. Without recording these
subflows, aRFS cannot steer interrupts and RX processing for the flows
to the desired CPU.
Record all subflows in the RPS table by calling sock_rps_record_flow()
for each subflow at the start of mptcp_sendmsg(), mptcp_recvmsg() and
mptcp_stream_accept(), by using the new helper
mptcp_rps_record_subflows().
It does not by itself improve throughput, but ensures that IRQ and RX
processing are directed to the right CPU, which is a
prerequisite for effective aRFS.
Signed-off-by: Christoph Paasch <cpaasch@openai.com>
---
net/mptcp/protocol.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4b510e04724f5c8be08a00a1cc03093bcd031905..543506b6a011af78208381857ab9c1962cbc6636 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -12,6 +12,7 @@
#include <linux/sched/signal.h>
#include <linux/atomic.h>
#include <net/aligned_data.h>
+#include <net/rps.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -1740,6 +1741,20 @@ static u32 mptcp_send_limit(const struct sock *sk)
return limit - not_sent;
}
+static void mptcp_rps_record_subflows(const struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+
+ if (!rfs_is_needed())
+ return;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ sock_rps_record_flow(ssk);
+ }
+}
+
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1753,6 +1768,8 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
+ mptcp_rps_record_subflows(msk);
+
if (unlikely(inet_test_bit(DEFER_CONNECT, sk) ||
msg->msg_flags & MSG_FASTOPEN)) {
int copied_syn = 0;
@@ -2131,6 +2148,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out_err;
}
+ mptcp_rps_record_subflows(msk);
+
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
len = min_t(size_t, len, INT_MAX);
@@ -3921,6 +3940,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
mptcp_sock_graft(ssk, newsock);
}
+ mptcp_rps_record_subflows(msk);
+
/* Do late cleanup for the first subflow as necessary. Also
* deal with bad peers not doing a complete shutdown.
*/
--
2.50.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-08-26 4:30 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-26 4:30 [PATCH mptcp-next v2 0/2] mptcp: Support recording MPTCP subflows in RPS table Christoph Paasch
2025-08-26 4:30 ` [PATCH mptcp-next v2 1/2] net: Add rfs_needed() helper Christoph Paasch
2025-08-26 4:30 ` [PATCH mptcp-next v2 2/2] mptcp: record subflows in RPS table Christoph Paasch
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).