From: Kuniyuki Iwashima <kuniyu@amazon.com>
To: "David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Simon Horman <horms@kernel.org>
Cc: Kuniyuki Iwashima <kuniyu@amazon.com>,
Kuniyuki Iwashima <kuni1840@gmail.com>, <netdev@vger.kernel.org>,
Matthieu Baerts <matttbe@kernel.org>,
Allison Henderson <allison.henderson@oracle.com>,
"Steve French" <sfrench@samba.org>,
Wenjia Zhang <wenjia@linux.ibm.com>,
Jan Karcher <jaka@linux.ibm.com>,
Chuck Lever <chuck.lever@oracle.com>,
Jeff Layton <jlayton@kernel.org>
Subject: [PATCH v2 net-next 12/15] socket: Remove kernel socket conversion.
Date: Tue, 10 Dec 2024 16:38:26 +0900 [thread overview]
Message-ID: <20241210073829.62520-13-kuniyu@amazon.com> (raw)
In-Reply-To: <20241210073829.62520-1-kuniyu@amazon.com>
Since commit 26abe14379f8 ("net: Modify sk_alloc to not reference count
the netns of kernel sockets."), TCP kernel socket has caused many UAF.
We have converted such sockets to hold netns refcnt, and we have the
same pattern in cifs, mptcp, rds, smc, and sunrpc.
Let's drop the conversion and use sock_create_net() instead.
The changes for cifs, mptcp, and smc are straightforward.
For rds, we need to move maybe_get_net() before sock_create_net() and
sock->ops->accept().
For sunrpc, we call sock_create_net() for IPPROTO_TCP only and still
call sock_create_kern() for others.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Acked-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Acked-by: Allison Henderson <allison.henderson@oracle.com>
---
v2: Collect Acked-by from MPTCP and RDS maintainers
Cc: Steve French <sfrench@samba.org>
Cc: Wenjia Zhang <wenjia@linux.ibm.com>
Cc: Jan Karcher <jaka@linux.ibm.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Jeff Layton <jlayton@kernel.org>
---
fs/smb/client/connect.c | 12 ++----------
net/mptcp/subflow.c | 10 +---------
net/rds/tcp.c | 13 -------------
net/rds/tcp_connect.c | 15 +++++++++++----
net/rds/tcp_listen.c | 14 ++++++++++++--
net/smc/af_smc.c | 20 ++------------------
net/sunrpc/svcsock.c | 12 ++++++------
net/sunrpc/xprtsock.c | 11 ++++-------
8 files changed, 38 insertions(+), 69 deletions(-)
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 9f6daa32c083..adac00cad15a 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -3132,21 +3132,13 @@ generic_ip_connect(struct TCP_Server_Info *server)
if (server->ssocket) {
socket = server->ssocket;
} else {
- struct net *net = cifs_net_ns(server);
- struct sock *sk;
-
- rc = sock_create_kern(net, sfamily, SOCK_STREAM,
- IPPROTO_TCP, &server->ssocket);
+ rc = sock_create_net(cifs_net_ns(server), sfamily, SOCK_STREAM,
+ IPPROTO_TCP, &server->ssocket);
if (rc < 0) {
cifs_server_dbg(VFS, "Error %d creating socket\n", rc);
return rc;
}
- sk = server->ssocket->sk;
- __netns_tracker_free(net, &sk->ns_tracker, false);
- sk->sk_net_refcnt = 1;
- get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
-
/* BB other socket options to set KEEPALIVE, NODELAY? */
cifs_dbg(FYI, "Socket created\n");
socket = server->ssocket;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index fc534290f119..e7e8972bdfca 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1755,7 +1755,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
if (unlikely(!sk->sk_socket))
return -EINVAL;
- err = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &sf);
+ err = sock_create_net(net, family, SOCK_STREAM, IPPROTO_TCP, &sf);
if (err)
return err;
@@ -1768,14 +1768,6 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
/* the newly created socket has to be in the same cgroup as its parent */
mptcp_attach_cgroup(sk, sf->sk);
- /* kernel sockets do not by default acquire net ref, but TCP timer
- * needs it.
- * Update ns_tracker to current stack trace and refcounted tracker.
- */
- __netns_tracker_free(net, &sf->sk->ns_tracker, false);
- sf->sk->sk_net_refcnt = 1;
- get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL);
-
err = tcp_set_ulp(sf->sk, "mptcp");
if (err)
goto err_free;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index f7e8a309f678..4509900476f7 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -494,20 +494,7 @@ bool rds_tcp_tune(struct socket *sock)
tcp_sock_set_nodelay(sock->sk);
lock_sock(sk);
- /* TCP timer functions might access net namespace even after
- * a process which created this net namespace terminated.
- */
- if (!sk->sk_net_refcnt) {
- if (!maybe_get_net(net)) {
- release_sock(sk);
- return false;
- }
- /* Update ns_tracker to current stack trace and refcounted tracker */
- __netns_tracker_free(net, &sk->ns_tracker, false);
- sk->sk_net_refcnt = 1;
- netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL);
- }
rtn = net_generic(net, rds_tcp_netid);
if (rtn->sndbuf_size > 0) {
sk->sk_sndbuf = rtn->sndbuf_size;
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index a0046e99d6df..19d7e6f7323d 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -93,6 +93,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
struct sockaddr_in6 sin6;
struct sockaddr_in sin;
struct sockaddr *addr;
+ struct net *net;
int addrlen;
bool isv6;
int ret;
@@ -107,20 +108,26 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
mutex_lock(&tc->t_conn_path_lock);
+ net = rds_conn_net(conn);
+
if (rds_conn_path_up(cp)) {
mutex_unlock(&tc->t_conn_path_lock);
return 0;
}
+
+ if (!maybe_get_net(net))
+ return -EINVAL;
+
if (ipv6_addr_v4mapped(&conn->c_laddr)) {
- ret = sock_create_kern(rds_conn_net(conn), PF_INET,
- SOCK_STREAM, IPPROTO_TCP, &sock);
+ ret = sock_create_net(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
isv6 = false;
} else {
- ret = sock_create_kern(rds_conn_net(conn), PF_INET6,
- SOCK_STREAM, IPPROTO_TCP, &sock);
+ ret = sock_create_net(net, PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
isv6 = true;
}
+ put_net(net);
+
if (ret < 0)
goto out;
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 69aaf03ab93e..440ac9057148 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -101,6 +101,7 @@ int rds_tcp_accept_one(struct socket *sock)
struct rds_connection *conn;
int ret;
struct inet_sock *inet;
+ struct net *net;
struct rds_tcp_connection *rs_tcp = NULL;
int conn_state;
struct rds_conn_path *cp;
@@ -108,7 +109,7 @@ int rds_tcp_accept_one(struct socket *sock)
struct proto_accept_arg arg = {
.flags = O_NONBLOCK,
.kern = true,
- .hold_net = false,
+ .hold_net = true,
};
#if !IS_ENABLED(CONFIG_IPV6)
struct in6_addr saddr, daddr;
@@ -118,13 +119,22 @@ int rds_tcp_accept_one(struct socket *sock)
if (!sock) /* module unload or netns delete in progress */
return -ENETUNREACH;
+ net = sock_net(sock->sk);
+
+ if (!maybe_get_net(net))
+ return -EINVAL;
+
ret = sock_create_lite(sock->sk->sk_family,
sock->sk->sk_type, sock->sk->sk_protocol,
&new_sock);
- if (ret)
+ if (ret) {
+ put_net(net);
goto out;
+ }
ret = sock->ops->accept(sock, new_sock, &arg);
+ put_net(net);
+
if (ret < 0)
goto out;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 10f9968f87b1..d53d4e14173d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -3307,24 +3307,8 @@ static const struct proto_ops smc_sock_ops = {
int smc_create_clcsk(struct net *net, struct sock *sk, int family)
{
- struct smc_sock *smc = smc_sk(sk);
- int rc;
-
- rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
- &smc->clcsock);
- if (rc)
- return rc;
-
- /* smc_clcsock_release() does not wait smc->clcsock->sk's
- * destruction; its sk_state might not be TCP_CLOSE after
- * smc->sk is close()d, and TCP timers can be fired later,
- * which need net ref.
- */
- sk = smc->clcsock->sk;
- __netns_tracker_free(net, &sk->ns_tracker, false);
- sk->sk_net_refcnt = 1;
- get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
- return 0;
+ return sock_create_net(net, family, SOCK_STREAM, IPPROTO_TCP,
+ &smc_sk(sk)->clcsock);
}
static int __smc_create(struct net *net, struct socket *sock, int protocol,
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index bdea406308a8..cde5765f6f81 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1526,7 +1526,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
return ERR_PTR(-EINVAL);
}
- error = sock_create_kern(net, family, type, protocol, &sock);
+ if (protocol == IPPROTO_TCP)
+ error = sock_create_net(net, family, type, protocol, &sock);
+ else
+ error = sock_create_kern(net, family, type, protocol, &sock);
if (error < 0)
return ERR_PTR(error);
@@ -1551,11 +1554,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
newlen = error;
if (protocol == IPPROTO_TCP) {
- __netns_tracker_free(net, &sock->sk->ns_tracker, false);
- sock->sk->sk_net_refcnt = 1;
- get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL);
-
- if ((error = kernel_listen(sock, 64)) < 0)
+ error = kernel_listen(sock, 64);
+ if (error < 0)
goto bummer;
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1bc3a480d919..f3e139c30442 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1924,7 +1924,10 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
struct socket *sock;
int err;
- err = sock_create_kern(xprt->xprt_net, family, type, protocol, &sock);
+ if (protocol == IPPROTO_TCP)
+ err = sock_create_net(xprt->xprt_net, family, type, protocol, &sock);
+ else
+ err = sock_create_kern(xprt->xprt_net, family, type, protocol, &sock);
if (err < 0) {
dprintk("RPC: can't create %d transport socket (%d).\n",
protocol, -err);
@@ -1941,12 +1944,6 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
goto out;
}
- if (protocol == IPPROTO_TCP) {
- __netns_tracker_free(xprt->xprt_net, &sock->sk->ns_tracker, false);
- sock->sk->sk_net_refcnt = 1;
- get_net_track(xprt->xprt_net, &sock->sk->ns_tracker, GFP_KERNEL);
- }
-
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
if (IS_ERR(filp))
return ERR_CAST(filp);
--
2.39.5 (Apple Git-154)
next prev parent reply other threads:[~2024-12-10 7:42 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-10 7:38 [PATCH v2 net-next 00/15] treewide: socket: Clean up sock_create() and friends Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 01/15] socket: Un-export __sock_create() Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 02/15] socket: Pass hold_net flag to __sock_create() Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 03/15] smc: Pass kern to smc_sock_alloc() Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 04/15] socket: Pass hold_net to struct net_proto_family.create() Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 05/15] ppp: Pass hold_net to struct pppox_proto.create() Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 06/15] nfc: Pass hold_net to struct nfc_protocol.create() Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 07/15] socket: Add hold_net flag to struct proto_accept_arg Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 08/15] socket: Pass hold_net to sk_alloc() Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 09/15] socket: Respect hold_net in sk_alloc() Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 10/15] socket: Don't count kernel sockets in /proc/net/sockstat Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 11/15] socket: Introduce sock_create_net() Kuniyuki Iwashima
2024-12-10 7:38 ` Kuniyuki Iwashima [this message]
2024-12-11 2:20 ` [PATCH v2 net-next 12/15] socket: Remove kernel socket conversion Jakub Kicinski
2024-12-12 17:35 ` Allison Henderson
2024-12-13 8:28 ` Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 13/15] socket: Use sock_create_net() instead of sock_create() Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 14/15] socket: Rename sock_create() to sock_create_user() Kuniyuki Iwashima
2024-12-10 7:38 ` [PATCH v2 net-next 15/15] socket: Rename sock_create_kern() to sock_create_net_noref() Kuniyuki Iwashima
2024-12-10 8:46 ` [PATCH v2 net-next 00/15] treewide: socket: Clean up sock_create() and friends Eric Dumazet
2024-12-10 9:47 ` Kuniyuki Iwashima
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241210073829.62520-13-kuniyu@amazon.com \
--to=kuniyu@amazon.com \
--cc=allison.henderson@oracle.com \
--cc=chuck.lever@oracle.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=jaka@linux.ibm.com \
--cc=jlayton@kernel.org \
--cc=kuba@kernel.org \
--cc=kuni1840@gmail.com \
--cc=matttbe@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sfrench@samba.org \
--cc=wenjia@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).