public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Cindy Lu <lulu@redhat.com>
To: lulu@redhat.com, mst@redhat.com, jasowang@redhat.com,
	kvm@vger.kernel.org, virtualization@lists.linux.dev,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [RFC 2/3] vhost/net: add netfilter socket support
Date: Sun,  8 Feb 2026 22:32:23 +0800	[thread overview]
Message-ID: <20260208143441.2177372-3-lulu@redhat.com> (raw)
In-Reply-To: <20260208143441.2177372-1-lulu@redhat.com>

Introduce the netfilter socket plumbing and the VHOST_NET_SET_FILTER ioctl.
Initialize the netfilter state on open and release it on reset/close.

Key points:
- Add filter_sock + filter_lock to vhost_net
- Validate SOCK_SEQPACKET AF_UNIX filter socket from userspace
- Add vhost_net_set_filter() and VHOST_NET_SET_FILTER ioctl handler
- Initialize filter state on open and clean up on reset/release

Signed-off-by: Cindy Lu <lulu@redhat.com>
---
 drivers/vhost/net.c | 109 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 7f886d3dba7d..f02deff0e53c 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -131,6 +131,7 @@ struct vhost_net_virtqueue {
 	struct vhost_net_buf rxq;
 	/* Batched XDP buffs */
 	struct xdp_buff *xdp;
+
 };
 
 struct vhost_net {
@@ -147,6 +148,15 @@ struct vhost_net {
 	bool tx_flush;
 	/* Private page frag cache */
 	struct page_frag_cache pf_cache;
+
+	/*
+	 * Optional vhost-net filter offload socket.
+	 * When configured, RX packets can be routed through a userspace
+	 * filter chain via a SOCK_SEQPACKET control socket. Access to
+	 * filter_sock is protected by filter_lock.
+	 */
+	struct socket *filter_sock;
+	spinlock_t filter_lock;
 };
 
 static unsigned vhost_net_zcopy_mask __read_mostly;
@@ -1128,6 +1138,95 @@ static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
 	return r;
 }
 
+/*
+ * Validate and acquire the filter socket from userspace.
+ *
+ * Returns:
+ *   - NULL when fd == -1 (explicitly disable filter)
+ *   - a ref-counted struct socket on success
+ *   - ERR_PTR(-errno) on validation failure
+ */
+static struct socket *get_filter_socket(int fd)
+{
+	int r;
+	struct socket *sock;
+
+	/* Special case: userspace asks to disable filter. */
+	if (fd == -1)
+		return NULL;
+
+	sock = sockfd_lookup(fd, &r);
+	if (!sock)
+		return ERR_PTR(-ENOTSOCK);
+
+	if (sock->sk->sk_family != AF_UNIX ||
+	    sock->sk->sk_type != SOCK_SEQPACKET) {
+		sockfd_put(sock);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return sock;
+}
+
+/*
+ * Drop the currently configured filter socket, if any.
+ *
+ * Caller does not need to hold filter_lock; this function clears the pointer
+ * under the lock and releases the socket reference afterwards.
+ */
+static void vhost_net_filter_stop(struct vhost_net *n)
+{
+	struct socket *sock = n->filter_sock;
+
+	spin_lock(&n->filter_lock);
+	n->filter_sock = NULL;
+	spin_unlock(&n->filter_lock);
+
+	if (sock)
+		sockfd_put(sock);
+}
+
+/*
+ * Install or remove a filter socket for this vhost-net device.
+ *
+ * The ioctl passes an fd for a SOCK_SEQPACKET AF_UNIX socket created by
+ * userspace. We validate the socket type, replace any existing filter socket,
+ * and keep a reference so RX path can safely send filter requests.
+ */
+static long vhost_net_set_filter(struct vhost_net *n, int fd)
+{
+	struct socket *sock;
+	int r;
+
+	mutex_lock(&n->dev.mutex);
+	r = vhost_dev_check_owner(&n->dev);
+	if (r)
+		goto out;
+
+	sock = get_filter_socket(fd);
+	if (IS_ERR(sock)) {
+		r = PTR_ERR(sock);
+		goto out;
+	}
+
+	vhost_net_filter_stop(n);
+
+	if (!sock) {
+		r = 0;
+		goto out;
+	}
+
+	spin_lock(&n->filter_lock);
+	n->filter_sock = sock;
+	spin_unlock(&n->filter_lock);
+
+	r = 0;
+
+out:
+	mutex_unlock(&n->dev.mutex);
+	return r;
+}
+
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
 static void handle_rx(struct vhost_net *net)
@@ -1383,6 +1482,8 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 
 	f->private_data = n;
 	page_frag_cache_init(&n->pf_cache);
+	spin_lock_init(&n->filter_lock);
+	n->filter_sock = NULL;
 
 	return 0;
 }
@@ -1433,6 +1534,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
 	struct socket *tx_sock;
 	struct socket *rx_sock;
 
+	vhost_net_filter_stop(n);
 	vhost_net_stop(n, &tx_sock, &rx_sock);
 	vhost_net_flush(n);
 	vhost_dev_stop(&n->dev);
@@ -1637,6 +1739,8 @@ static long vhost_net_reset_owner(struct vhost_net *n)
 	err = vhost_dev_check_owner(&n->dev);
 	if (err)
 		goto done;
+
+	vhost_net_filter_stop(n);
 	umem = vhost_dev_reset_owner_prepare();
 	if (!umem) {
 		err = -ENOMEM;
@@ -1737,6 +1841,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
 	void __user *argp = (void __user *)arg;
 	u64 __user *featurep = argp;
 	struct vhost_vring_file backend;
+	struct vhost_net_filter filter;
 	u64 features, count, copied;
 	int r, i;
 
@@ -1745,6 +1850,10 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
 		if (copy_from_user(&backend, argp, sizeof backend))
 			return -EFAULT;
 		return vhost_net_set_backend(n, backend.index, backend.fd);
+	case VHOST_NET_SET_FILTER:
+		if (copy_from_user(&filter, argp, sizeof(filter)))
+			return -EFAULT;
+		return vhost_net_set_filter(n, filter.fd);
 	case VHOST_GET_FEATURES:
 		features = vhost_net_features[0];
 		if (copy_to_user(featurep, &features, sizeof features))
-- 
2.52.0


  parent reply	other threads:[~2026-02-08 14:35 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-08 14:32 [RFC 0/3] vhost-net: netfilter support for RX path Cindy Lu
2026-02-08 14:32 ` [RFC 1/3] uapi: vhost: add vhost-net netfilter offload API Cindy Lu
2026-02-08 17:32   ` Michael S. Tsirkin
2026-02-09  6:41     ` Cindy Lu
2026-02-08 14:32 ` Cindy Lu [this message]
2026-02-08 14:32 ` [RFC 3/3] vhost/net: add RX netfilter offload path Cindy Lu
2026-02-09  1:46 ` [RFC 0/3] vhost-net: netfilter support for RX path Jason Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260208143441.2177372-3-lulu@redhat.com \
    --to=lulu@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=virtualization@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox