From: Bobby Eshleman <bobbyeshleman@gmail.com>
To: "Stefano Garzarella" <sgarzare@redhat.com>,
"Shuah Khan" <shuah@kernel.org>,
"David S. Miller" <davem@davemloft.net>,
"Eric Dumazet" <edumazet@google.com>,
"Jakub Kicinski" <kuba@kernel.org>,
"Paolo Abeni" <pabeni@redhat.com>,
"Simon Horman" <horms@kernel.org>,
"Stefan Hajnoczi" <stefanha@redhat.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
"Jason Wang" <jasowang@redhat.com>,
"Xuan Zhuo" <xuanzhuo@linux.alibaba.com>,
"Eugenio Pérez" <eperezma@redhat.com>,
"K. Y. Srinivasan" <kys@microsoft.com>,
"Haiyang Zhang" <haiyangz@microsoft.com>,
"Wei Liu" <wei.liu@kernel.org>,
"Dexuan Cui" <decui@microsoft.com>,
"Bryan Tan" <bryan-bt.tan@broadcom.com>,
"Vishnu Dasa" <vishnu.dasa@broadcom.com>,
"Broadcom internal kernel review list"
<bcm-kernel-feedback-list@broadcom.com>,
"Bobby Eshleman" <bobbyeshleman@gmail.com>
Cc: virtualization@lists.linux.dev, netdev@vger.kernel.org,
linux-kselftest@vger.kernel.org, linux-kernel@vger.kernel.org,
kvm@vger.kernel.org, linux-hyperv@vger.kernel.org,
Sargun Dhillon <sargun@sargun.me>,
berrange@redhat.com, Bobby Eshleman <bobbyeshleman@meta.com>
Subject: [PATCH net-next v9 07/14] vhost/vsock: add netns support
Date: Tue, 11 Nov 2025 22:54:49 -0800 [thread overview]
Message-ID: <20251111-vsock-vmtest-v9-7-852787a37bed@meta.com> (raw)
In-Reply-To: <20251111-vsock-vmtest-v9-0-852787a37bed@meta.com>
From: Bobby Eshleman <bobbyeshleman@meta.com>
Add the ability to isolate vhost-vsock flows using namespaces.
The VM, via the vhost_vsock struct, inherits its namespace from the
process that opens the vhost-vsock device. vhost_vsock lookup functions
are modified to take into account the mode (e.g., if CIDs are matching
but modes don't align, then return NULL).
When namespace modes are evaluated during socket usage we always use the
mode of the namespace at the time the vhost vsock device file was
opened. If that namespace is later changed from "global" to "local"
mode, the vsock will continue operating as if the change never happened
(i.e., it is in "global" mode). This avoids breaking already established
flows.
vhost_vsock now acquires a reference to the namespace.
Suggested-by: Sargun Dhillon <sargun@sargun.me>
Signed-off-by: Bobby Eshleman <bobbyeshleman@meta.com>
---
Changes in v9:
- add more information about net_mode and rationale (changing modes) to
both code comment and commit message
Changes in v7:
- remove the check_global flag of vhost_vsock_get(), that logic was both
wrong and not necessary, reuse vsock_net_check_mode() instead
- remove 'delete me' comment
Changes in v5:
- respect pid namespaces when assigning namespace to vhost_vsock
---
drivers/vhost/vsock.c | 42 ++++++++++++++++++++++++++++++++----------
1 file changed, 32 insertions(+), 10 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 0a0e73405532..09f9321e4bc8 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -46,6 +46,11 @@ static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
struct vhost_vsock {
struct vhost_dev dev;
struct vhost_virtqueue vqs[2];
+ struct net *net;
+ netns_tracker ns_tracker;
+
+ /* The ns mode at the time vhost_vsock was created */
+ enum vsock_net_mode net_mode;
/* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */
struct hlist_node hash;
@@ -67,7 +72,8 @@ static u32 vhost_transport_get_local_cid(void)
/* Callers that dereference the return value must hold vhost_vsock_mutex or the
* RCU read lock.
*/
-static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid, struct net *net,
+ enum vsock_net_mode mode)
{
struct vhost_vsock *vsock;
@@ -78,9 +84,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
if (other_cid == 0)
continue;
- if (other_cid == guest_cid)
+ if (other_cid == guest_cid &&
+ vsock_net_check_mode(net, mode, vsock->net, vsock->net_mode))
return vsock;
-
}
return NULL;
@@ -279,7 +285,7 @@ vhost_transport_send_pkt(struct sk_buff *skb, struct net *net,
rcu_read_lock();
/* Find the vhost_vsock according to guest context id */
- vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid));
+ vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid), net, net_mode);
if (!vsock) {
rcu_read_unlock();
kfree_skb(skb);
@@ -306,7 +312,8 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
rcu_read_lock();
/* Find the vhost_vsock according to guest context id */
- vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
+ vsock = vhost_vsock_get(vsk->remote_addr.svm_cid,
+ sock_net(sk_vsock(vsk)), vsk->net_mode);
if (!vsock)
goto out;
@@ -463,11 +470,12 @@ static struct virtio_transport vhost_transport = {
static bool vhost_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid)
{
+ struct net *net = sock_net(sk_vsock(vsk));
struct vhost_vsock *vsock;
bool seqpacket_allow = false;
rcu_read_lock();
- vsock = vhost_vsock_get(remote_cid);
+ vsock = vhost_vsock_get(remote_cid, net, vsk->net_mode);
if (vsock)
seqpacket_allow = vsock->seqpacket_allow;
@@ -538,8 +546,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
if (le64_to_cpu(hdr->src_cid) == vsock->guest_cid &&
le64_to_cpu(hdr->dst_cid) ==
vhost_transport_get_local_cid())
- virtio_transport_recv_pkt(&vhost_transport, skb, NULL,
- 0);
+ virtio_transport_recv_pkt(&vhost_transport, skb,
+ vsock->net, vsock->net_mode);
else
kfree_skb(skb);
@@ -654,8 +662,10 @@ static void vhost_vsock_free(struct vhost_vsock *vsock)
static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
{
+
struct vhost_virtqueue **vqs;
struct vhost_vsock *vsock;
+ struct net *net;
int ret;
/* This struct is large and allocation could fail, fall back to vmalloc
@@ -671,6 +681,17 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
goto out;
}
+ net = current->nsproxy->net_ns;
+ vsock->net = get_net_track(net, &vsock->ns_tracker, GFP_KERNEL);
+
+ /* Store the mode of the namespace at the time of creation. If this
+ * namespace later changes from "global" to "local", we want this vsock
+ * to continue operating normally and not suddenly break. For that
+ * reason, we save the mode here and later use it when performing
+ * socket lookups with vsock_net_check_mode() (see vhost_vsock_get()).
+ */
+ vsock->net_mode = vsock_net_mode(net);
+
vsock->guest_cid = 0; /* no CID assigned yet */
vsock->seqpacket_allow = false;
@@ -710,7 +731,7 @@ static void vhost_vsock_reset_orphans(struct sock *sk)
*/
/* If the peer is still valid, no need to reset connection */
- if (vhost_vsock_get(vsk->remote_addr.svm_cid))
+ if (vhost_vsock_get(vsk->remote_addr.svm_cid, sock_net(sk), vsk->net_mode))
return;
/* If the close timeout is pending, let it expire. This avoids races
@@ -755,6 +776,7 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue);
vhost_dev_cleanup(&vsock->dev);
+ put_net_track(vsock->net, &vsock->ns_tracker);
kfree(vsock->dev.vqs);
vhost_vsock_free(vsock);
return 0;
@@ -781,7 +803,7 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
/* Refuse if CID is already in use */
mutex_lock(&vhost_vsock_mutex);
- other = vhost_vsock_get(guest_cid);
+ other = vhost_vsock_get(guest_cid, vsock->net, vsock->net_mode);
if (other && other != vsock) {
mutex_unlock(&vhost_vsock_mutex);
return -EADDRINUSE;
--
2.47.3
next prev parent reply other threads:[~2025-11-12 6:55 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-12 6:54 [PATCH net-next v9 00/14] vsock: add namespace support to vhost-vsock and loopback Bobby Eshleman
2025-11-12 6:54 ` [PATCH net-next v9 01/14] vsock: a per-net vsock NS mode state Bobby Eshleman
2025-11-12 14:13 ` Stefano Garzarella
2025-11-12 6:54 ` [PATCH net-next v9 02/14] vsock: add netns to vsock core Bobby Eshleman
2025-11-12 14:14 ` Stefano Garzarella
2025-11-12 6:54 ` [PATCH net-next v9 03/14] vsock/virtio: add netns support to virtio transport and virtio common Bobby Eshleman
2025-11-12 14:18 ` Stefano Garzarella
2025-11-12 16:13 ` Bobby Eshleman
2025-11-12 17:39 ` Stefano Garzarella
2025-11-12 19:32 ` Bobby Eshleman
2025-11-13 15:31 ` Stefano Garzarella
2025-11-12 6:54 ` [PATCH net-next v9 04/14] vsock/virtio: pack struct virtio_vsock_skb_cb Bobby Eshleman
2025-11-12 6:54 ` [PATCH net-next v9 05/14] vsock: add netns and netns_tracker to vsock skb cb Bobby Eshleman
2025-11-12 6:54 ` [PATCH net-next v9 06/14] vsock/loopback: add netns support Bobby Eshleman
2025-11-12 14:19 ` Stefano Garzarella
2025-11-12 18:27 ` Bobby Eshleman
2025-11-13 15:24 ` Stefano Garzarella
2025-11-13 18:26 ` Bobby Eshleman
2025-11-14 9:33 ` Stefano Garzarella
2025-11-14 22:13 ` Bobby Eshleman
2025-11-17 9:27 ` Stefano Garzarella
2025-11-12 6:54 ` Bobby Eshleman [this message]
2025-11-12 6:54 ` [PATCH net-next v9 08/14] vsock: reject bad VSOCK_NET_MODE_LOCAL configuration for G2H Bobby Eshleman
2025-11-12 14:21 ` Stefano Garzarella
2025-11-12 18:36 ` Bobby Eshleman
2025-11-12 6:54 ` [PATCH net-next v9 09/14] selftests/vsock: add namespace helpers to vmtest.sh Bobby Eshleman
2025-11-12 6:54 ` [PATCH net-next v9 10/14] selftests/vsock: prepare vm management helpers for namespaces Bobby Eshleman
2025-11-12 14:23 ` Stefano Garzarella
2025-11-12 6:54 ` [PATCH net-next v9 11/14] selftests/vsock: add tests for proc sys vsock ns_mode Bobby Eshleman
2025-11-12 14:38 ` Stefano Garzarella
2025-11-12 6:54 ` [PATCH net-next v9 12/14] selftests/vsock: add namespace tests for CID collisions Bobby Eshleman
2025-11-12 6:54 ` [PATCH net-next v9 13/14] selftests/vsock: add tests for host <-> vm connectivity with namespaces Bobby Eshleman
2025-11-12 14:41 ` Stefano Garzarella
2025-11-12 6:54 ` [PATCH net-next v9 14/14] selftests/vsock: add tests for namespace deletion and mode changes Bobby Eshleman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251111-vsock-vmtest-v9-7-852787a37bed@meta.com \
--to=bobbyeshleman@gmail.com \
--cc=bcm-kernel-feedback-list@broadcom.com \
--cc=berrange@redhat.com \
--cc=bobbyeshleman@meta.com \
--cc=bryan-bt.tan@broadcom.com \
--cc=davem@davemloft.net \
--cc=decui@microsoft.com \
--cc=edumazet@google.com \
--cc=eperezma@redhat.com \
--cc=haiyangz@microsoft.com \
--cc=horms@kernel.org \
--cc=jasowang@redhat.com \
--cc=kuba@kernel.org \
--cc=kvm@vger.kernel.org \
--cc=kys@microsoft.com \
--cc=linux-hyperv@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sargun@sargun.me \
--cc=sgarzare@redhat.com \
--cc=shuah@kernel.org \
--cc=stefanha@redhat.com \
--cc=virtualization@lists.linux.dev \
--cc=vishnu.dasa@broadcom.com \
--cc=wei.liu@kernel.org \
--cc=xuanzhuo@linux.alibaba.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox