From: Allison Henderson <achender@kernel.org>
To: netdev@vger.kernel.org
Cc: linux-kselftest@vger.kernel.org, pabeni@redhat.com,
edumazet@google.com, rds-devel@oss.oracle.com, kuba@kernel.org,
horms@kernel.org, linux-rdma@vger.kernel.org,
allison.henderson@oracle.com
Subject: [PATCH net-next v3 3/4] net/rds: Delegate fan-out to a background worker
Date: Tue, 10 Feb 2026 02:12:34 -0700 [thread overview]
Message-ID: <20260210091235.1817860-4-achender@kernel.org> (raw)
In-Reply-To: <20260210091235.1817860-1-achender@kernel.org>
From: Gerd Rausch <gerd.rausch@oracle.com>
Delegate fan-out to a background worker in order to allow
kernel_getpeername() to acquire a lock on the socket.
This has become necessary since the introduction of
commit "9dfc685e0262d ("inet: remove races in inet{6}_getname()")
The socket is already locked in the context that
"kernel_getpeername" used to get called by either
rds_tcp_recv_path" or "tcp_v{4,6}_rcv",
and therefore causing a deadlock.
Luckily, the fan-out need not happen in-context nor fast,
so we can easily just do the same in a background worker.
Also, while we're doing this, we get rid of the unused
struct members "t_conn_w", "t_send_w", "t_down_w" & "t_recv_w".
Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
Signed-off-by: Allison Henderson <achender@kernel.org>
---
net/rds/tcp.c | 3 +++
net/rds/tcp.h | 7 ++----
net/rds/tcp_connect.c | 2 ++
net/rds/tcp_listen.c | 54 +++++++++++++++++++++++++++++++------------
4 files changed, 46 insertions(+), 20 deletions(-)
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 45484a93d75f..02f8f928c20b 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -358,6 +358,8 @@ static void rds_tcp_conn_free(void *arg)
rdsdebug("freeing tc %p\n", tc);
+ cancel_work_sync(&tc->t_fan_out_w);
+
spin_lock_irqsave(&rds_tcp_conn_lock, flags);
if (!tc->t_tcp_node_detached)
list_del(&tc->t_tcp_node);
@@ -384,6 +386,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
tc->t_tinc = NULL;
tc->t_tinc_hdr_rem = sizeof(struct rds_header);
tc->t_tinc_data_rem = 0;
+ INIT_WORK(&tc->t_fan_out_w, rds_tcp_fan_out_w);
init_waitqueue_head(&tc->t_recv_done_waitq);
conn->c_path[i].cp_transport_data = tc;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 39c86347188c..9ecb0b6b658a 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -44,11 +44,7 @@ struct rds_tcp_connection {
size_t t_tinc_hdr_rem;
size_t t_tinc_data_rem;
- /* XXX error report? */
- struct work_struct t_conn_w;
- struct work_struct t_send_w;
- struct work_struct t_down_w;
- struct work_struct t_recv_w;
+ struct work_struct t_fan_out_w;
/* for info exporting only */
struct list_head t_list_item;
@@ -90,6 +86,7 @@ void rds_tcp_state_change(struct sock *sk);
struct socket *rds_tcp_listen_init(struct net *net, bool isv6);
void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
void rds_tcp_listen_data_ready(struct sock *sk);
+void rds_tcp_fan_out_w(struct work_struct *work);
void rds_tcp_conn_slots_available(struct rds_connection *conn, bool fan_out);
int rds_tcp_accept_one(struct rds_tcp_net *rtn);
void rds_tcp_keepalive(struct socket *sock);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index b77c88ffb199..6954b8c479f1 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -115,6 +115,8 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
if (cp->cp_index > 0 && cp->cp_conn->c_npaths < 2)
return -EAGAIN;
+ cancel_work_sync(&tc->t_fan_out_w);
+
mutex_lock(&tc->t_conn_path_lock);
if (rds_conn_path_up(cp)) {
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index cdc86473a1ba..f2c4778be0b3 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -123,27 +123,20 @@ rds_tcp_accept_one_path(struct rds_connection *conn, struct socket *sock)
return NULL;
}
-void rds_tcp_conn_slots_available(struct rds_connection *conn, bool fan_out)
+void rds_tcp_fan_out_w(struct work_struct *work)
{
- struct rds_tcp_connection *tc;
- struct rds_tcp_net *rtn;
- struct socket *sock;
+ struct rds_tcp_connection *tc = container_of(work,
+ struct rds_tcp_connection,
+ t_fan_out_w);
+ struct rds_connection *conn = tc->t_cpath->cp_conn;
+ struct rds_tcp_net *rtn = tc->t_rtn;
+ struct socket *sock = tc->t_sock;
int sport, npaths;
- if (rds_destroy_pending(conn))
- return;
-
- tc = conn->c_path->cp_transport_data;
- rtn = tc->t_rtn;
- if (!rtn)
- return;
-
- sock = tc->t_sock;
-
/* During fan-out, check that the connection we already
* accepted in slot#0 carried the proper source port modulo.
*/
- if (fan_out && conn->c_with_sport_idx && sock &&
+ if (conn->c_with_sport_idx && sock &&
rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) > 0) {
/* cp->cp_index is encoded in lowest bits of source-port */
sport = rds_tcp_get_peer_sport(sock);
@@ -167,6 +160,37 @@ void rds_tcp_conn_slots_available(struct rds_connection *conn, bool fan_out)
rds_tcp_accept_work(rtn);
}
+void rds_tcp_conn_slots_available(struct rds_connection *conn, bool fan_out)
+{
+ struct rds_conn_path *cp0;
+ struct rds_tcp_connection *tc;
+ struct rds_tcp_net *rtn;
+
+ if (rds_destroy_pending(conn))
+ return;
+
+ cp0 = conn->c_path;
+ tc = cp0->cp_transport_data;
+ rtn = tc->t_rtn;
+ if (!rtn)
+ return;
+
+ if (fan_out)
+ /* Delegate fan-out to a background worker in order
+ * to allow "kernel_getpeername" to acquire a lock
+ * on the socket.
+ * The socket is already locked in this context
+ * by either "rds_tcp_recv_path" or "tcp_v{4,6}_rcv",
+ * depending on the origin of the dequeue-request.
+ */
+ queue_work(cp0->cp_wq, &tc->t_fan_out_w);
+ else
+ /* Fan-out either already happened or is unnecessary.
+ * Just go ahead and attempt to accept more connections
+ */
+ rds_tcp_accept_work(rtn);
+}
+
int rds_tcp_accept_one(struct rds_tcp_net *rtn)
{
struct socket *listen_sock = rtn->rds_tcp_listen_sock;
--
2.43.0
next prev parent reply other threads:[~2026-02-10 9:12 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-10 9:12 [PATCH net-next v3 0/4] net/rds: RDS-TCP reconnect and fanout improvements Allison Henderson
2026-02-10 9:12 ` [PATCH net-next v3 1/4] net/rds: Fix NULL pointer dereference in rds_tcp_accept_one Allison Henderson
2026-02-10 9:12 ` [PATCH net-next v3 2/4] net/rds: Refactor __rds_conn_create for blocking transport cleanup Allison Henderson
2026-02-10 9:12 ` Allison Henderson [this message]
2026-02-10 9:12 ` [PATCH net-next v3 4/4] net/rds: Use proper peer port number even when not connected Allison Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260210091235.1817860-4-achender@kernel.org \
--to=achender@kernel.org \
--cc=allison.henderson@oracle.com \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=rds-devel@oss.oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox