From: Bobby Eshleman <bobbyeshleman@gmail.com>
To: "Stefano Garzarella" <sgarzare@redhat.com>,
"Shuah Khan" <shuah@kernel.org>,
"David S. Miller" <davem@davemloft.net>,
"Eric Dumazet" <edumazet@google.com>,
"Jakub Kicinski" <kuba@kernel.org>,
"Paolo Abeni" <pabeni@redhat.com>,
"Simon Horman" <horms@kernel.org>,
"Stefan Hajnoczi" <stefanha@redhat.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
"Jason Wang" <jasowang@redhat.com>,
"Xuan Zhuo" <xuanzhuo@linux.alibaba.com>,
"Eugenio Pérez" <eperezma@redhat.com>,
"K. Y. Srinivasan" <kys@microsoft.com>,
"Haiyang Zhang" <haiyangz@microsoft.com>,
"Wei Liu" <wei.liu@kernel.org>,
"Dexuan Cui" <decui@microsoft.com>,
"Bryan Tan" <bryan-bt.tan@broadcom.com>,
"Vishnu Dasa" <vishnu.dasa@broadcom.com>,
"Broadcom internal kernel review list"
<bcm-kernel-feedback-list@broadcom.com>
Cc: virtualization@lists.linux.dev, netdev@vger.kernel.org,
linux-kselftest@vger.kernel.org, linux-kernel@vger.kernel.org,
kvm@vger.kernel.org, linux-hyperv@vger.kernel.org,
Bobby Eshleman <bobbyeshleman@gmail.com>,
berrange@redhat.com, Bobby Eshleman <bobbyeshleman@meta.com>
Subject: [PATCH RFC net-next v4 03/12] vsock: add netns to af_vsock core
Date: Tue, 05 Aug 2025 14:49:11 -0700 [thread overview]
Message-ID: <20250805-vsock-vmtest-v4-3-059ec51ab111@meta.com> (raw)
In-Reply-To: <20250805-vsock-vmtest-v4-0-059ec51ab111@meta.com>
From: Bobby Eshleman <bobbyeshleman@meta.com>
Add netns functionality (initialization, passing to transports, procfs,
etc...) to the af_vsock socket layer. Later patches that add netns support to
transports depend on this patch.
Signed-off-by: Bobby Eshleman <bobbyeshleman@meta.com>
---
include/net/af_vsock.h | 13 +++-
net/vmw_vsock/af_vsock.c | 198 +++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 194 insertions(+), 17 deletions(-)
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index d34bf7dbc69a..0c0c351394de 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -144,7 +144,7 @@ struct vsock_transport {
int flags);
int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg,
size_t len);
- bool (*seqpacket_allow)(u32 remote_cid);
+ bool (*seqpacket_allow)(struct vsock_sock *vsk, u32 remote_cid);
u32 (*seqpacket_has_data)(struct vsock_sock *vsk);
/* Notification. */
@@ -214,9 +214,10 @@ void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
void vsock_insert_connected(struct vsock_sock *vsk);
void vsock_remove_bound(struct vsock_sock *vsk);
void vsock_remove_connected(struct vsock_sock *vsk);
-struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr, struct net *net);
struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
- struct sockaddr_vm *dst);
+ struct sockaddr_vm *dst,
+ struct net *net);
void vsock_remove_sock(struct vsock_sock *vsk);
void vsock_for_each_connected_socket(struct vsock_transport *transport,
void (*fn)(struct sock *sk));
@@ -258,6 +259,12 @@ static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t)
return t->msgzerocopy_allow && t->msgzerocopy_allow();
}
+extern struct net __vsock_global_net;
+static inline struct net *vsock_global_net(void)
+{
+ return &__vsock_global_net;
+}
+
static inline u8 vsock_net_mode(struct net *net)
{
u8 ret;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 218d91e6b32b..c69c2db03162 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -83,6 +83,24 @@
* TCP_ESTABLISHED - connected
* TCP_CLOSING - disconnecting
* TCP_LISTEN - listening
+ *
+ * - Namespaces in vsock support two different modes configured
+ * through /proc/net/vsock_ns_mode. The modes are "local" and "global".
+ * Each mode defines how the namespace interacts with CIDs.
+ * /proc/net/vsock_ns_mode is write-once, so that it may be configured
+ * by a namespace manager. The default is "global". The mode is set
+ * per-namespace.
+ *
+ * The modes affect the allocation and accessibility of CIDs as follows:
+ * - global - aka fully public
+ * - CID allocation draws from the public pool
+ * - AF_VSOCK sockets may reach any CID allocated from the public pool
+ * - AF_VSOCK sockets may not reach CIDs allocated from private pools
+ *
+ * - local - aka fully private
+ * - CID allocation draws only from the private pool, does not affect public pool
+ * - AF_VSOCK sockets may only reach CIDs from the private pool
+ * - AF_VSOCK sockets may not reach CIDs allocated from outside the pool
*/
#include <linux/compat.h>
@@ -100,6 +118,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/net.h>
+#include <linux/proc_fs.h>
#include <linux/poll.h>
#include <linux/random.h>
#include <linux/skbuff.h>
@@ -111,6 +130,7 @@
#include <linux/workqueue.h>
#include <net/sock.h>
#include <net/af_vsock.h>
+#include <net/netns/vsock.h>
#include <uapi/linux/vm_sockets.h>
#include <uapi/asm-generic/ioctls.h>
@@ -149,6 +169,9 @@ static const struct vsock_transport *transport_dgram;
static const struct vsock_transport *transport_local;
static DEFINE_MUTEX(vsock_register_mutex);
+struct net __vsock_global_net;
+EXPORT_SYMBOL_GPL(__vsock_global_net);
+
/**** UTILS ****/
/* Each bound VSocket is stored in the bind hash table and each connected
@@ -235,33 +258,42 @@ static void __vsock_remove_connected(struct vsock_sock *vsk)
sock_put(&vsk->sk);
}
-static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
+static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr,
+ struct net *net)
{
struct vsock_sock *vsk;
list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) {
+ struct sock *sk = sk_vsock(vsk);
+
if (vsock_addr_equals_addr(addr, &vsk->local_addr))
- return sk_vsock(vsk);
+ if (vsock_net_check_mode(net, sock_net(sk)))
+ return sk;
if (addr->svm_port == vsk->local_addr.svm_port &&
(vsk->local_addr.svm_cid == VMADDR_CID_ANY ||
- addr->svm_cid == VMADDR_CID_ANY))
- return sk_vsock(vsk);
+ addr->svm_cid == VMADDR_CID_ANY) &&
+ vsock_net_check_mode(net, sock_net(sk)))
+ return sk;
}
return NULL;
}
static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
- struct sockaddr_vm *dst)
+ struct sockaddr_vm *dst,
+ struct net *net)
{
struct vsock_sock *vsk;
list_for_each_entry(vsk, vsock_connected_sockets(src, dst),
connected_table) {
+ struct sock *sk = sk_vsock(vsk);
+
if (vsock_addr_equals_addr(src, &vsk->remote_addr) &&
- dst->svm_port == vsk->local_addr.svm_port) {
- return sk_vsock(vsk);
+ dst->svm_port == vsk->local_addr.svm_port &&
+ vsock_net_check_mode(net, sock_net(sk))) {
+ return sk;
}
}
@@ -304,12 +336,12 @@ void vsock_remove_connected(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(vsock_remove_connected);
-struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr)
+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr, struct net *net)
{
struct sock *sk;
spin_lock_bh(&vsock_table_lock);
- sk = __vsock_find_bound_socket(addr);
+ sk = __vsock_find_bound_socket(addr, net);
if (sk)
sock_hold(sk);
@@ -320,12 +352,13 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr)
EXPORT_SYMBOL_GPL(vsock_find_bound_socket);
struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
- struct sockaddr_vm *dst)
+ struct sockaddr_vm *dst,
+ struct net *net)
{
struct sock *sk;
spin_lock_bh(&vsock_table_lock);
- sk = __vsock_find_connected_socket(src, dst);
+ sk = __vsock_find_connected_socket(src, dst, net);
if (sk)
sock_hold(sk);
@@ -528,7 +561,7 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
if (sk->sk_type == SOCK_SEQPACKET) {
if (!new_transport->seqpacket_allow ||
- !new_transport->seqpacket_allow(remote_cid)) {
+ !new_transport->seqpacket_allow(vsk, remote_cid)) {
module_put(new_transport->module);
return -ESOCKTNOSUPPORT;
}
@@ -678,6 +711,7 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
{
static u32 port;
struct sockaddr_vm new_addr;
+ struct net *net = sock_net(sk_vsock(vsk));
if (!port)
port = get_random_u32_above(LAST_RESERVED_PORT);
@@ -694,7 +728,7 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
new_addr.svm_port = port++;
- if (!__vsock_find_bound_socket(&new_addr)) {
+ if (!__vsock_find_bound_socket(&new_addr, net)) {
found = true;
break;
}
@@ -711,7 +745,7 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
return -EACCES;
}
- if (__vsock_find_bound_socket(&new_addr))
+ if (__vsock_find_bound_socket(&new_addr, net))
return -EADDRINUSE;
}
@@ -2645,6 +2679,133 @@ static struct miscdevice vsock_device = {
.fops = &vsock_device_ops,
};
+#define VSOCK_NS_MODE_NAME_MAX 8
+
+static struct ctl_table vsock_table[] = {
+ {
+ .procname = "vsock_ns_mode",
+ .data = &init_net.vsock.ns_mode,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dostring
+ },
+};
+
+static int __net_init vsock_sysctl_register(struct net *net)
+{
+ struct ctl_table *table;
+
+ if (net_eq(net, &init_net)) {
+ table = vsock_table;
+ } else {
+ table = kmemdup(vsock_table, sizeof(vsock_table), GFP_KERNEL);
+ if (!table)
+ goto err_alloc;
+
+ table[0].data = &net->vsock.ns_mode;
+ }
+
+ net->vsock.vsock_hdr = register_net_sysctl_sz(net, "net/vsock", table,
+ ARRAY_SIZE(vsock_table));
+ if (!net->vsock.vsock_hdr)
+ goto err_reg;
+
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static void vsock_sysctl_unregister(struct net *net)
+{
+ const struct ctl_table *table;
+
+ table = net->vsock.vsock_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->vsock.vsock_hdr);
+ if (!net_eq(net, &init_net))
+ kfree(table);
+}
+
+#ifdef CONFIG_PROC_FS
+static int vsock_proc_ns_mode_show(struct seq_file *seq, void *v)
+{
+ struct net *net = seq_file_single_net(seq);
+ const char *p = "invalid";
+
+ spin_lock_bh(&net->vsock.lock);
+ if (net->vsock.ns_mode == VSOCK_NET_MODE_GLOBAL)
+ p = "global";
+ else if (net->vsock.ns_mode == VSOCK_NET_MODE_LOCAL)
+ p = "local";
+ else
+ WARN_ONCE(1, "invalid vsock_ns_mode");
+ spin_unlock_bh(&net->vsock.lock);
+ seq_printf(seq, "%s", p);
+ return 0;
+}
+
+static int vsock_proc_ns_mode_write(struct file *file, char *buf, size_t size)
+{
+ struct seq_file *m = file->private_data;
+ struct net *net = seq_file_single_net(m);
+ size_t len = size - 1;
+ int ret = 0;
+
+ if (!vsock_net_mode_can_set(net))
+ return -EPERM;
+
+ if (!strncmp(buf, "global", len))
+ vsock_net_set_mode(net, VSOCK_NET_MODE_GLOBAL);
+ else if (!strncmp(buf, "local", len))
+ vsock_net_set_mode(net, VSOCK_NET_MODE_LOCAL);
+ else
+ return -EINVAL;
+
+ return ret;
+}
+#endif /* CONFIG_PROC_FS */
+
+static void vsock_net_init(struct net *net)
+{
+ spin_lock_init(&net->vsock.lock);
+ net->vsock.ns_mode = VSOCK_NET_MODE_GLOBAL;
+}
+
+static __net_init int vsock_sysctl_init_net(struct net *net)
+{
+ vsock_net_init(net);
+
+ if (vsock_sysctl_register(net))
+ return -ENOMEM;
+
+#ifdef CONFIG_PROC_FS
+ if (!proc_create_net_single_write("vsock_ns_mode", 0644, net->proc_net,
+ vsock_proc_ns_mode_show,
+ vsock_proc_ns_mode_write,
+ NULL))
+ goto err_sysctl;
+#endif
+
+ return 0;
+
+err_sysctl:
+ vsock_sysctl_unregister(net);
+ return -ENOMEM;
+}
+
+static __net_exit void vsock_sysctl_exit_net(struct net *net)
+{
+ vsock_sysctl_unregister(net);
+}
+
+static struct pernet_operations vsock_sysctl_ops __net_initdata = {
+ .init = vsock_sysctl_init_net,
+ .exit = vsock_sysctl_exit_net,
+};
+
static int __init vsock_init(void)
{
int err = 0;
@@ -2672,10 +2833,19 @@ static int __init vsock_init(void)
goto err_unregister_proto;
}
+ if (register_pernet_subsys(&vsock_sysctl_ops)) {
+ err = -ENOMEM;
+ goto err_unregister_sock;
+ }
+
+ vsock_net_init(&init_net);
+ vsock_net_init(vsock_global_net());
vsock_bpf_build_proto();
return 0;
+err_unregister_sock:
+ sock_unregister(AF_VSOCK);
err_unregister_proto:
proto_unregister(&vsock_proto);
err_deregister_misc:
--
2.47.3
next prev parent reply other threads:[~2025-08-05 21:49 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-05 21:49 [PATCH RFC net-next v4 00/12] vsock: add namespace support to vhost-vsock Bobby Eshleman
2025-08-05 21:49 ` [PATCH RFC net-next v4 01/12] vsock: a per-net vsock NS mode state Bobby Eshleman
2025-08-05 21:49 ` [PATCH RFC net-next v4 02/12] vsock: add net to vsock skb cb Bobby Eshleman
2025-08-05 21:49 ` Bobby Eshleman [this message]
2025-08-07 1:35 ` [PATCH RFC net-next v4 03/12] vsock: add netns to af_vsock core kernel test robot
2025-08-05 21:49 ` [PATCH RFC net-next v4 04/12] vsock/virtio: add netns to virtio transport common Bobby Eshleman
2025-08-05 21:49 ` [PATCH RFC net-next v4 05/12] vhost/vsock: add netns support Bobby Eshleman
2025-08-05 21:49 ` [PATCH RFC net-next v4 06/12] vsock/virtio: use the global netns Bobby Eshleman
2025-08-05 21:49 ` [PATCH RFC net-next v4 07/12] hv_sock: add netns hooks Bobby Eshleman
2025-08-05 21:49 ` [PATCH RFC net-next v4 08/12] vsock/vmci: " Bobby Eshleman
2025-08-05 21:49 ` [PATCH RFC net-next v4 09/12] vsock/loopback: add netns support Bobby Eshleman
2025-08-06 19:12 ` Simon Horman
2025-08-06 21:31 ` Bobby Eshleman
2025-08-07 4:13 ` kernel test robot
2025-08-05 21:49 ` [PATCH RFC net-next v4 10/12] selftests/vsock: improve logging in vmtest.sh Bobby Eshleman
2025-08-05 21:49 ` [PATCH RFC net-next v4 11/12] selftests/vsock: invoke vsock_test through helpers Bobby Eshleman
2025-08-05 21:49 ` [PATCH RFC net-next v4 12/12] selftests/vsock: add namespace tests Bobby Eshleman
2025-08-05 22:03 ` [PATCH RFC net-next v4 00/12] vsock: add namespace support to vhost-vsock Bobby Eshleman
2025-08-06 19:13 ` Simon Horman
2025-08-06 21:31 ` Bobby Eshleman
2025-08-07 8:06 ` Stefano Garzarella
2025-08-07 20:24 ` Bobby Eshleman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250805-vsock-vmtest-v4-3-059ec51ab111@meta.com \
--to=bobbyeshleman@gmail.com \
--cc=bcm-kernel-feedback-list@broadcom.com \
--cc=berrange@redhat.com \
--cc=bobbyeshleman@meta.com \
--cc=bryan-bt.tan@broadcom.com \
--cc=davem@davemloft.net \
--cc=decui@microsoft.com \
--cc=edumazet@google.com \
--cc=eperezma@redhat.com \
--cc=haiyangz@microsoft.com \
--cc=horms@kernel.org \
--cc=jasowang@redhat.com \
--cc=kuba@kernel.org \
--cc=kvm@vger.kernel.org \
--cc=kys@microsoft.com \
--cc=linux-hyperv@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sgarzare@redhat.com \
--cc=shuah@kernel.org \
--cc=stefanha@redhat.com \
--cc=virtualization@lists.linux.dev \
--cc=vishnu.dasa@broadcom.com \
--cc=wei.liu@kernel.org \
--cc=xuanzhuo@linux.alibaba.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.