From: Kuniyuki Iwashima <kuniyu@amazon.com>
To: "David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Willem de Bruijn <willemb@google.com>
Cc: Simon Horman <horms@kernel.org>,
Kuniyuki Iwashima <kuniyu@amazon.com>,
Kuniyuki Iwashima <kuni1840@gmail.com>,
Chuck Lever <chuck.lever@oracle.com>,
Jeff Layton <jlayton@kernel.org>,
Matthieu Baerts <matttbe@kernel.org>,
"Keith Busch" <kbusch@kernel.org>, Jens Axboe <axboe@kernel.dk>,
Christoph Hellwig <hch@lst.de>,
Wenjia Zhang <wenjia@linux.ibm.com>,
Jan Karcher <jaka@linux.ibm.com>,
Steve French <sfrench@samba.org>, <netdev@vger.kernel.org>,
<mptcp@lists.linux.dev>, <linux-nfs@vger.kernel.org>,
<linux-rdma@vger.kernel.org>, <linux-nvme@lists.infradead.org>
Subject: [PATCH v2 net-next 5/7] socket: Remove kernel socket conversion except for net/rds/.
Date: Fri, 23 May 2025 11:21:11 -0700 [thread overview]
Message-ID: <20250523182128.59346-6-kuniyu@amazon.com> (raw)
In-Reply-To: <20250523182128.59346-1-kuniyu@amazon.com>
Since commit 26abe14379f8 ("net: Modify sk_alloc to not reference
count the netns of kernel sockets."), TCP kernel socket has caused
many UAF.
We have converted such sockets to hold netns refcnt, and we have
the same pattern in cifs, mptcp, nvme, rds, smc, and sunrpc.
__sock_create_kern(..., &sock);
sk_net_refcnt_upgrade(sock->sk);
Let's drop the conversion and use sock_create_kern() instead.
The changes for cifs, mptcp, nvme, and smc are straightforward.
For sunrpc, we call sk_net_refcnt_upgrade() for IPPROTO_TCP only
so we use sock_create_kern() for TCP and use __sock_create_kern()
for others.
For rds, we cannot drop sk_net_refcnt_upgrade() for accept()ed
sockets.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Acked-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> # net/mptcp
Acked-by: Chuck Lever <chuck.lever@oracle.com>
---
v2: Drop unnecessary change for sunrpc and updated changelog for sunrpc
---
drivers/nvme/host/tcp.c | 7 +++----
fs/smb/client/connect.c | 12 ++----------
net/mptcp/subflow.c | 7 +------
net/smc/af_smc.c | 18 ++----------------
net/sunrpc/svcsock.c | 6 ++++--
net/sunrpc/xprtsock.c | 8 ++++----
6 files changed, 16 insertions(+), 42 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 3d3bdc5e280f..fabb1cc02564 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1756,9 +1756,9 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
queue->cmnd_capsule_len = sizeof(struct nvme_command) +
NVME_TCP_ADMIN_CCSZ;
- ret = __sock_create_kern(current->nsproxy->net_ns,
- ctrl->addr.ss_family, SOCK_STREAM,
- IPPROTO_TCP, &queue->sock);
+ ret = sock_create_kern(current->nsproxy->net_ns,
+ ctrl->addr.ss_family, SOCK_STREAM,
+ IPPROTO_TCP, &queue->sock);
if (ret) {
dev_err(nctrl->device,
"failed to create socket: %d\n", ret);
@@ -1771,7 +1771,6 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
goto err_destroy_mutex;
}
- sk_net_refcnt_upgrade(queue->sock->sk);
nvme_tcp_reclassify_socket(queue->sock);
/* Single syn retry */
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index afac23a5a3ec..c7b4f5a7cca1 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -3348,22 +3348,14 @@ generic_ip_connect(struct TCP_Server_Info *server)
socket = server->ssocket;
} else {
struct net *net = cifs_net_ns(server);
- struct sock *sk;
- rc = __sock_create_kern(net, sfamily, SOCK_STREAM,
- IPPROTO_TCP, &server->ssocket);
+ rc = sock_create_kern(net, sfamily, SOCK_STREAM,
+ IPPROTO_TCP, &server->ssocket);
if (rc < 0) {
cifs_server_dbg(VFS, "Error %d creating socket\n", rc);
return rc;
}
- sk = server->ssocket->sk;
- __netns_tracker_free(net, &sk->ns_tracker, false);
- net_passive_dec(net);
- sk->sk_net_refcnt = 1;
- get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
- sock_inuse_add(net, 1);
-
/* BB other socket options to set KEEPALIVE, NODELAY? */
cifs_dbg(FYI, "Socket created\n");
socket = server->ssocket;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 602e689e991f..00e5cecb7683 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1757,7 +1757,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
if (unlikely(!sk->sk_socket))
return -EINVAL;
- err = __sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &sf);
+ err = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &sf);
if (err)
return err;
@@ -1770,11 +1770,6 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
/* the newly created socket has to be in the same cgroup as its parent */
mptcp_attach_cgroup(sk, sf->sk);
- /* kernel sockets do not by default acquire net ref, but TCP timer
- * needs it.
- * Update ns_tracker to current stack trace and refcounted tracker.
- */
- sk_net_refcnt_upgrade(sf->sk);
err = tcp_set_ulp(sf->sk, "mptcp");
if (err)
goto err_free;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index d998ffed1712..6140a9e386d0 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -3328,22 +3328,8 @@ static const struct proto_ops smc_sock_ops = {
int smc_create_clcsk(struct net *net, struct sock *sk, int family)
{
- struct smc_sock *smc = smc_sk(sk);
- int rc;
-
- rc = __sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
- &smc->clcsock);
- if (rc)
- return rc;
-
- /* smc_clcsock_release() does not wait smc->clcsock->sk's
- * destruction; its sk_state might not be TCP_CLOSE after
- * smc->sk is close()d, and TCP timers can be fired later,
- * which need net ref.
- */
- sk = smc->clcsock->sk;
- sk_net_refcnt_upgrade(sk);
- return 0;
+ return sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
+ &smc_sk(sk)->clcsock);
}
static int __smc_create(struct net *net, struct socket *sock, int protocol,
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index adacfd03153a..10d83a03ccfa 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1516,7 +1516,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
return ERR_PTR(-EINVAL);
}
- error = __sock_create_kern(net, family, type, protocol, &sock);
+ if (protocol == IPPROTO_TCP)
+ error = sock_create_kern(net, family, type, protocol, &sock);
+ else
+ error = __sock_create_kern(net, family, type, protocol, &sock);
if (error < 0)
return ERR_PTR(error);
@@ -1541,7 +1544,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
newlen = error;
if (protocol == IPPROTO_TCP) {
- sk_net_refcnt_upgrade(sock->sk);
if ((error = kernel_listen(sock, 64)) < 0)
goto bummer;
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 6fb921ce6cf2..f9576bd8f9c5 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1924,7 +1924,10 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
struct socket *sock;
int err;
- err = __sock_create_kern(xprt->xprt_net, family, type, protocol, &sock);
+ if (protocol == IPPROTO_TCP)
+ err = sock_create_kern(xprt->xprt_net, family, type, protocol, &sock);
+ else
+ err = __sock_create_kern(xprt->xprt_net, family, type, protocol, &sock);
if (err < 0) {
dprintk("RPC: can't create %d transport socket (%d).\n",
protocol, -err);
@@ -1941,9 +1944,6 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
goto out;
}
- if (protocol == IPPROTO_TCP)
- sk_net_refcnt_upgrade(sock->sk);
-
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
if (IS_ERR(filp))
return ERR_CAST(filp);
--
2.49.0
next prev parent reply other threads:[~2025-05-23 18:23 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-23 18:21 [PATCH v2 net-next 0/7] socket: Make sock_create_kern() robust against misuse Kuniyuki Iwashima
2025-05-23 18:21 ` [PATCH v2 net-next 1/7] socket: Un-export __sock_create() Kuniyuki Iwashima
2025-05-26 5:29 ` Christoph Hellwig
2025-05-26 10:06 ` David Laight
2025-05-30 2:42 ` Kuniyuki Iwashima
2025-05-23 18:21 ` [PATCH v2 net-next 2/7] socket: Rename sock_create_kern() to __sock_create_kern() Kuniyuki Iwashima
2025-05-26 5:30 ` Christoph Hellwig
2025-05-29 21:29 ` David Laight
2025-05-30 3:05 ` Kuniyuki Iwashima
2025-05-30 6:48 ` David Laight
2025-05-30 2:45 ` Kuniyuki Iwashima
2025-05-23 18:21 ` [PATCH v2 net-next 3/7] socket: Restore sock_create_kern() Kuniyuki Iwashima
2025-05-26 5:32 ` Christoph Hellwig
2025-05-30 2:53 ` Kuniyuki Iwashima
2025-06-02 5:08 ` Christoph Hellwig
2025-06-03 21:30 ` David Laight
2025-06-04 18:36 ` Kuniyuki Iwashima
2025-05-23 18:21 ` [PATCH v2 net-next 4/7] smb: client: Add missing net_passive_dec() Kuniyuki Iwashima
2025-05-23 18:21 ` Kuniyuki Iwashima [this message]
2025-05-26 5:33 ` [PATCH v2 net-next 5/7] socket: Remove kernel socket conversion except for net/rds/ Christoph Hellwig
2025-05-30 2:59 ` Kuniyuki Iwashima
2025-06-02 5:08 ` Christoph Hellwig
2025-05-23 18:21 ` [PATCH v2 net-next 6/7] socket: Replace most sock_create() calls with sock_create_kern() Kuniyuki Iwashima
2025-05-26 5:33 ` Christoph Hellwig
2025-05-26 5:35 ` Christoph Hellwig
2025-05-30 3:03 ` Kuniyuki Iwashima
2025-06-02 5:09 ` Christoph Hellwig
2025-06-02 21:52 ` Kuniyuki Iwashima
2025-06-03 4:50 ` Christoph Hellwig
2025-06-04 18:20 ` Kuniyuki Iwashima
2025-06-05 4:28 ` Christoph Hellwig
2025-05-23 18:21 ` [PATCH v2 net-next 7/7] socket: Clean up kdoc for sock_create() and sock_create_lite() Kuniyuki Iwashima
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250523182128.59346-6-kuniyu@amazon.com \
--to=kuniyu@amazon.com \
--cc=axboe@kernel.dk \
--cc=chuck.lever@oracle.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=hch@lst.de \
--cc=horms@kernel.org \
--cc=jaka@linux.ibm.com \
--cc=jlayton@kernel.org \
--cc=kbusch@kernel.org \
--cc=kuba@kernel.org \
--cc=kuni1840@gmail.com \
--cc=linux-nfs@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=linux-rdma@vger.kernel.org \
--cc=matttbe@kernel.org \
--cc=mptcp@lists.linux.dev \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sfrench@samba.org \
--cc=wenjia@linux.ibm.com \
--cc=willemb@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox