[PATCH net-next 0/2] in order support for vhost-net

kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH net-next 0/2] in order support for vhost-net
@ 2025-07-08  6:48 Jason Wang
  2025-07-08  6:48 ` [PATCH net-next 1/2] vhost: basic in order support Jason Wang
  2025-07-08  6:48 ` [PATCH net-next 2/2] vhost_net: basic in_order support Jason Wang
  0 siblings, 2 replies; 8+ messages in thread
From: Jason Wang @ 2025-07-08  6:48 UTC (permalink / raw)
  To: mst, jasowang, eperezma
  Cc: kvm, virtualization, netdev, linux-kernel, jonah.palmer

Hi all,

This series implements VIRTIO_F_IN_ORDER support for vhost-net. This
feature is designed to improve the performance of the virtio ring by
optimizing descriptor processing.

Benchmarks show a notable improvement. Please see patch 2 for details.

Thanks

Jason Wang (2):
  vhost: basic in order support
  vhost_net: basic in_order support

 drivers/vhost/net.c   |  88 +++++++++++++++++++++---------
 drivers/vhost/vhost.c | 121 +++++++++++++++++++++++++++++++++++-------
 drivers/vhost/vhost.h |   8 ++-
 3 files changed, 170 insertions(+), 47 deletions(-)

-- 
2.31.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH net-next 1/2] vhost: basic in order support
  2025-07-08  6:48 [PATCH net-next 0/2] in order support for vhost-net Jason Wang
@ 2025-07-08  6:48 ` Jason Wang
  2025-07-09 16:38   ` Jonah Palmer
  2025-07-10  9:04   ` Eugenio Perez Martin
  2025-07-08  6:48 ` [PATCH net-next 2/2] vhost_net: basic in_order support Jason Wang
  1 sibling, 2 replies; 8+ messages in thread
From: Jason Wang @ 2025-07-08  6:48 UTC (permalink / raw)
  To: mst, jasowang, eperezma
  Cc: kvm, virtualization, netdev, linux-kernel, jonah.palmer

This patch adds basic in order support for vhost. Two optimizations
are implemented in this patch:

1) Since driver uses descriptor in order, vhost can deduce the next
   avail ring head by counting the number of descriptors that has been
   used in next_avail_head. This eliminate the need to access the
   available ring in vhost.

2) vhost_add_used_and_singal_n() is extended to accept the number of
   batched buffers per used elem. While this increases the times of
   usersapce memory access but it helps to reduce the chance of
   used ring access of both the driver and vhost.

Vhost-net will be the first user for this.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/net.c   |   6 ++-
 drivers/vhost/vhost.c | 121 +++++++++++++++++++++++++++++++++++-------
 drivers/vhost/vhost.h |   8 ++-
 3 files changed, 111 insertions(+), 24 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 7cbfc7d718b3..4f9c67f17b49 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -374,7 +374,8 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
 	while (j) {
 		add = min(UIO_MAXIOV - nvq->done_idx, j);
 		vhost_add_used_and_signal_n(vq->dev, vq,
-					    &vq->heads[nvq->done_idx], add);
+					    &vq->heads[nvq->done_idx],
+					    NULL, add);
 		nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV;
 		j -= add;
 	}
@@ -457,7 +458,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
 	if (!nvq->done_idx)
 		return;
 
-	vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
+	vhost_add_used_and_signal_n(dev, vq, vq->heads, NULL,
+				    nvq->done_idx);
 	nvq->done_idx = 0;
 }
 
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 3a5ebb973dba..c7ed069fc49e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -364,6 +364,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 	vq->avail = NULL;
 	vq->used = NULL;
 	vq->last_avail_idx = 0;
+	vq->next_avail_head = 0;
 	vq->avail_idx = 0;
 	vq->last_used_idx = 0;
 	vq->signalled_used = 0;
@@ -455,6 +456,8 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
 	vq->log = NULL;
 	kfree(vq->heads);
 	vq->heads = NULL;
+	kfree(vq->nheads);
+	vq->nheads = NULL;
 }
 
 /* Helper to allocate iovec buffers for all vqs. */
@@ -472,7 +475,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
 					GFP_KERNEL);
 		vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
 					  GFP_KERNEL);
-		if (!vq->indirect || !vq->log || !vq->heads)
+		vq->nheads = kmalloc_array(dev->iov_limit, sizeof(*vq->nheads),
+					   GFP_KERNEL);
+		if (!vq->indirect || !vq->log || !vq->heads || !vq->nheads)
 			goto err_nomem;
 	}
 	return 0;
@@ -1990,14 +1995,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
 			break;
 		}
 		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
-			vq->last_avail_idx = s.num & 0xffff;
+			vq->next_avail_head = vq->last_avail_idx =
+					      s.num & 0xffff;
 			vq->last_used_idx = (s.num >> 16) & 0xffff;
 		} else {
 			if (s.num > 0xffff) {
 				r = -EINVAL;
 				break;
 			}
-			vq->last_avail_idx = s.num;
+			vq->next_avail_head = vq->last_avail_idx = s.num;
 		}
 		/* Forget the cached index value. */
 		vq->avail_idx = vq->last_avail_idx;
@@ -2590,11 +2596,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 		      unsigned int *out_num, unsigned int *in_num,
 		      struct vhost_log *log, unsigned int *log_num)
 {
+	bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
 	struct vring_desc desc;
 	unsigned int i, head, found = 0;
 	u16 last_avail_idx = vq->last_avail_idx;
 	__virtio16 ring_head;
-	int ret, access;
+	int ret, access, c = 0;
 
 	if (vq->avail_idx == vq->last_avail_idx) {
 		ret = vhost_get_avail_idx(vq);
@@ -2605,17 +2612,21 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 			return vq->num;
 	}
 
-	/* Grab the next descriptor number they're advertising, and increment
-	 * the index we've seen. */
-	if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) {
-		vq_err(vq, "Failed to read head: idx %d address %p\n",
-		       last_avail_idx,
-		       &vq->avail->ring[last_avail_idx % vq->num]);
-		return -EFAULT;
+	if (in_order)
+		head = vq->next_avail_head & (vq->num - 1);
+	else {
+		/* Grab the next descriptor number they're
+		 * advertising, and increment the index we've seen. */
+		if (unlikely(vhost_get_avail_head(vq, &ring_head,
+						  last_avail_idx))) {
+			vq_err(vq, "Failed to read head: idx %d address %p\n",
+				last_avail_idx,
+				&vq->avail->ring[last_avail_idx % vq->num]);
+			return -EFAULT;
+		}
+		head = vhost16_to_cpu(vq, ring_head);
 	}
 
-	head = vhost16_to_cpu(vq, ring_head);
-
 	/* If their number is silly, that's an error. */
 	if (unlikely(head >= vq->num)) {
 		vq_err(vq, "Guest says index %u > %u is available",
@@ -2658,6 +2669,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 						"in indirect descriptor at idx %d\n", i);
 				return ret;
 			}
+			++c;
 			continue;
 		}
 
@@ -2693,10 +2705,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 			}
 			*out_num += ret;
 		}
+		++c;
 	} while ((i = next_desc(vq, &desc)) != -1);
 
 	/* On success, increment avail index. */
 	vq->last_avail_idx++;
+	vq->next_avail_head += c;
 
 	/* Assume notifications from guest are disabled at this point,
 	 * if they aren't we would need to update avail_event index. */
@@ -2720,8 +2734,9 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
 		cpu_to_vhost32(vq, head),
 		cpu_to_vhost32(vq, len)
 	};
+	u16 nheads = 1;
 
-	return vhost_add_used_n(vq, &heads, 1);
+	return vhost_add_used_n(vq, &heads, &nheads, 1);
 }
 EXPORT_SYMBOL_GPL(vhost_add_used);
 
@@ -2757,10 +2772,10 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
 	return 0;
 }
 
-/* After we've used one of their buffers, we tell them about it.  We'll then
- * want to notify the guest, using eventfd. */
-int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
-		     unsigned count)
+static int vhost_add_used_n_ooo(struct vhost_virtqueue *vq,
+				struct vring_used_elem *heads,
+				u16 *nheads,
+				unsigned count)
 {
 	int start, n, r;
 
@@ -2775,6 +2790,70 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
 	}
 	r = __vhost_add_used_n(vq, heads, count);
 
+	return r;
+}
+
+static int vhost_add_used_n_in_order(struct vhost_virtqueue *vq,
+				     struct vring_used_elem *heads,
+				     u16 *nheads,
+				     unsigned count)
+{
+	vring_used_elem_t __user *used;
+	u16 old, new = vq->last_used_idx;
+	int start, i;
+
+	if (!nheads)
+		return -EINVAL;
+
+	start = vq->last_used_idx & (vq->num - 1);
+	used = vq->used->ring + start;
+
+	for (i = 0; i < count; i++) {
+		if (vhost_put_used(vq, &heads[i], start, 1)) {
+			vq_err(vq, "Failed to write used");
+			return -EFAULT;
+		}
+		start += nheads[i];
+		new += nheads[i];
+		if (start >= vq->num)
+			start -= vq->num;
+	}
+
+	if (unlikely(vq->log_used)) {
+		/* Make sure data is seen before log. */
+		smp_wmb();
+		/* Log used ring entry write. */
+		log_used(vq, ((void __user *)used - (void __user *)vq->used),
+			 (vq->num - start) * sizeof *used);
+		if (start + count > vq->num)
+			log_used(vq, 0,
+				 (start + count - vq->num) * sizeof *used);
+	}
+
+	old = vq->last_used_idx;
+	vq->last_used_idx = new;
+	/* If the driver never bothers to signal in a very long while,
+	 * used index might wrap around. If that happens, invalidate
+	 * signalled_used index we stored. TODO: make sure driver
+	 * signals at least once in 2^16 and remove this. */
+	if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
+		vq->signalled_used_valid = false;
+	return 0;
+}
+
+/* After we've used one of their buffers, we tell them about it.  We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
+		     u16 *nheads, unsigned count)
+{
+	bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
+	int r;
+
+	if (!in_order || !nheads)
+		r = vhost_add_used_n_ooo(vq, heads, nheads, count);
+	else
+		r = vhost_add_used_n_in_order(vq, heads, nheads, count);
+
 	/* Make sure buffer is written before we update index. */
 	smp_wmb();
 	if (vhost_put_used_idx(vq)) {
@@ -2853,9 +2932,11 @@ EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
 /* multi-buffer version of vhost_add_used_and_signal */
 void vhost_add_used_and_signal_n(struct vhost_dev *dev,
 				 struct vhost_virtqueue *vq,
-				 struct vring_used_elem *heads, unsigned count)
+				 struct vring_used_elem *heads,
+				 u16 *nheads,
+				 unsigned count)
 {
-	vhost_add_used_n(vq, heads, count);
+	vhost_add_used_n(vq, heads, nheads, count);
 	vhost_signal(dev, vq);
 }
 EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index bb75a292d50c..dca9f309d396 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -103,6 +103,8 @@ struct vhost_virtqueue {
 	 * Values are limited to 0x7fff, and the high bit is used as
 	 * a wrap counter when using VIRTIO_F_RING_PACKED. */
 	u16 last_avail_idx;
+	/* Next avail ring head when VIRTIO_F_IN_ORDER is neogitated */
+	u16 next_avail_head;
 
 	/* Caches available index value from user. */
 	u16 avail_idx;
@@ -129,6 +131,7 @@ struct vhost_virtqueue {
 	struct iovec iotlb_iov[64];
 	struct iovec *indirect;
 	struct vring_used_elem *heads;
+	u16 *nheads;
 	/* Protected by virtqueue mutex. */
 	struct vhost_iotlb *umem;
 	struct vhost_iotlb *iotlb;
@@ -213,11 +216,12 @@ bool vhost_vq_is_setup(struct vhost_virtqueue *vq);
 int vhost_vq_init_access(struct vhost_virtqueue *);
 int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
 int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
-		     unsigned count);
+		     u16 *nheads, unsigned count);
 void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
 			       unsigned int id, int len);
 void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
-			       struct vring_used_elem *heads, unsigned count);
+				 struct vring_used_elem *heads, u16 *nheads,
+				 unsigned count);
 void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
 void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
 bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 2/2] vhost_net: basic in_order support
  2025-07-08  6:48 [PATCH net-next 0/2] in order support for vhost-net Jason Wang
  2025-07-08  6:48 ` [PATCH net-next 1/2] vhost: basic in order support Jason Wang
@ 2025-07-08  6:48 ` Jason Wang
  2025-07-09 16:38   ` Jonah Palmer
  2025-07-10 11:56   ` Eugenio Perez Martin
  1 sibling, 2 replies; 8+ messages in thread
From: Jason Wang @ 2025-07-08  6:48 UTC (permalink / raw)
  To: mst, jasowang, eperezma
  Cc: kvm, virtualization, netdev, linux-kernel, jonah.palmer

This patch introduces basic in-order support for vhost-net. By
recording the number of batched buffers in an array when calling
`vhost_add_used_and_signal_n()`, we can reduce the number of userspace
accesses. Note that the vhost-net batching logic is kept as we still
count the number of buffers there.

Testing Results:

With testpmd:

- TX: txonly mode + vhost_net with XDP_DROP on TAP shows a 17.5%
  improvement, from 4.75 Mpps to 5.35 Mpps.
- RX: No obvious improvements were observed.

With virtio-ring in-order experimental code in the guest:

- TX: pktgen in the guest + XDP_DROP on  TAP shows a 19% improvement,
  from 5.2 Mpps to 6.2 Mpps.
- RX: pktgen on TAP with vhost_net + XDP_DROP in the guest achieves a
  6.1% improvement, from 3.47 Mpps to 3.61 Mpps.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/net.c | 86 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 61 insertions(+), 25 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 4f9c67f17b49..8ac994b3228a 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -74,7 +74,8 @@ enum {
 			 (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
 			 (1ULL << VIRTIO_NET_F_MRG_RXBUF) |
 			 (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
-			 (1ULL << VIRTIO_F_RING_RESET)
+			 (1ULL << VIRTIO_F_RING_RESET) |
+			 (1ULL << VIRTIO_F_IN_ORDER)
 };
 
 enum {
@@ -450,7 +451,8 @@ static int vhost_net_enable_vq(struct vhost_net *n,
 	return vhost_poll_start(poll, sock->file);
 }
 
-static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
+static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq,
+				  unsigned int count)
 {
 	struct vhost_virtqueue *vq = &nvq->vq;
 	struct vhost_dev *dev = vq->dev;
@@ -458,8 +460,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
 	if (!nvq->done_idx)
 		return;
 
-	vhost_add_used_and_signal_n(dev, vq, vq->heads, NULL,
-				    nvq->done_idx);
+	vhost_add_used_and_signal_n(dev, vq, vq->heads,
+				    vq->nheads, count);
 	nvq->done_idx = 0;
 }
 
@@ -468,6 +470,8 @@ static void vhost_tx_batch(struct vhost_net *net,
 			   struct socket *sock,
 			   struct msghdr *msghdr)
 {
+	struct vhost_virtqueue *vq = &nvq->vq;
+	bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
 	struct tun_msg_ctl ctl = {
 		.type = TUN_MSG_PTR,
 		.num = nvq->batched_xdp,
@@ -475,6 +479,11 @@ static void vhost_tx_batch(struct vhost_net *net,
 	};
 	int i, err;
 
+	if (in_order) {
+		vq->heads[0].len = 0;
+		vq->nheads[0] = nvq->done_idx;
+	}
+
 	if (nvq->batched_xdp == 0)
 		goto signal_used;
 
@@ -496,7 +505,7 @@ static void vhost_tx_batch(struct vhost_net *net,
 	}
 
 signal_used:
-	vhost_net_signal_used(nvq);
+	vhost_net_signal_used(nvq, in_order ? 1 : nvq->done_idx);
 	nvq->batched_xdp = 0;
 }
 
@@ -758,6 +767,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
 	int sent_pkts = 0;
 	bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
 	bool busyloop_intr;
+	bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
 
 	do {
 		busyloop_intr = false;
@@ -794,11 +804,13 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
 				break;
 			}
 
-			/* We can't build XDP buff, go for single
-			 * packet path but let's flush batched
-			 * packets.
-			 */
-			vhost_tx_batch(net, nvq, sock, &msg);
+			if (nvq->batched_xdp) {
+				/* We can't build XDP buff, go for single
+				 * packet path but let's flush batched
+				 * packets.
+				 */
+				vhost_tx_batch(net, nvq, sock, &msg);
+			}
 			msg.msg_control = NULL;
 		} else {
 			if (tx_can_batch(vq, total_len))
@@ -819,8 +831,12 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
 			pr_debug("Truncated TX packet: len %d != %zd\n",
 				 err, len);
 done:
-		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
-		vq->heads[nvq->done_idx].len = 0;
+		if (in_order) {
+			vq->heads[0].id = cpu_to_vhost32(vq, head);
+		} else {
+			vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
+			vq->heads[nvq->done_idx].len = 0;
+		}
 		++nvq->done_idx;
 	} while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
 
@@ -999,7 +1015,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
 }
 
 static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
-				      bool *busyloop_intr)
+				      bool *busyloop_intr, unsigned int count)
 {
 	struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
 	struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
@@ -1009,7 +1025,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
 
 	if (!len && rvq->busyloop_timeout) {
 		/* Flush batched heads first */
-		vhost_net_signal_used(rnvq);
+		vhost_net_signal_used(rnvq, count);
 		/* Both tx vq and rx socket were polled here */
 		vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true);
 
@@ -1021,7 +1037,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
 
 /* This is a multi-buffer version of vhost_get_desc, that works if
  *	vq has read descriptors only.
- * @vq		- the relevant virtqueue
+ * @nvq		- the relevant vhost_net virtqueue
  * @datalen	- data length we'll be reading
  * @iovcount	- returned count of io vectors we fill
  * @log		- vhost log
@@ -1029,14 +1045,17 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
  * @quota       - headcount quota, 1 for big buffer
  *	returns number of buffer heads allocated, negative on error
  */
-static int get_rx_bufs(struct vhost_virtqueue *vq,
+static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
 		       struct vring_used_elem *heads,
+		       u16 *nheads,
 		       int datalen,
 		       unsigned *iovcount,
 		       struct vhost_log *log,
 		       unsigned *log_num,
 		       unsigned int quota)
 {
+	struct vhost_virtqueue *vq = &nvq->vq;
+	bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
 	unsigned int out, in;
 	int seg = 0;
 	int headcount = 0;
@@ -1073,14 +1092,16 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 			nlogs += *log_num;
 			log += *log_num;
 		}
-		heads[headcount].id = cpu_to_vhost32(vq, d);
 		len = iov_length(vq->iov + seg, in);
-		heads[headcount].len = cpu_to_vhost32(vq, len);
-		datalen -= len;
+		if (!in_order) {
+			heads[headcount].id = cpu_to_vhost32(vq, d);
+			heads[headcount].len = cpu_to_vhost32(vq, len);
+		}
 		++headcount;
+		datalen -= len;
 		seg += in;
 	}
-	heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
+
 	*iovcount = seg;
 	if (unlikely(log))
 		*log_num = nlogs;
@@ -1090,6 +1111,15 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 		r = UIO_MAXIOV + 1;
 		goto err;
 	}
+
+	if (!in_order)
+		heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
+	else {
+		heads[0].len = cpu_to_vhost32(vq, len + datalen);
+		heads[0].id = cpu_to_vhost32(vq, d);
+		nheads[0] = headcount;
+	}
+
 	return headcount;
 err:
 	vhost_discard_vq_desc(vq, headcount);
@@ -1102,6 +1132,8 @@ static void handle_rx(struct vhost_net *net)
 {
 	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
 	struct vhost_virtqueue *vq = &nvq->vq;
+	bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
+	unsigned int count = 0;
 	unsigned in, log;
 	struct vhost_log *vq_log;
 	struct msghdr msg = {
@@ -1149,12 +1181,13 @@ static void handle_rx(struct vhost_net *net)
 
 	do {
 		sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
-						      &busyloop_intr);
+						      &busyloop_intr, count);
 		if (!sock_len)
 			break;
 		sock_len += sock_hlen;
 		vhost_len = sock_len + vhost_hlen;
-		headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
+		headcount = get_rx_bufs(nvq, vq->heads + count,
+					vq->nheads + count,
 					vhost_len, &in, vq_log, &log,
 					likely(mergeable) ? UIO_MAXIOV : 1);
 		/* On error, stop handling until the next kick. */
@@ -1230,8 +1263,11 @@ static void handle_rx(struct vhost_net *net)
 			goto out;
 		}
 		nvq->done_idx += headcount;
-		if (nvq->done_idx > VHOST_NET_BATCH)
-			vhost_net_signal_used(nvq);
+		count += in_order ? 1 : headcount;
+		if (nvq->done_idx > VHOST_NET_BATCH) {
+			vhost_net_signal_used(nvq, count);
+			count = 0;
+		}
 		if (unlikely(vq_log))
 			vhost_log_write(vq, vq_log, log, vhost_len,
 					vq->iov, in);
@@ -1243,7 +1279,7 @@ static void handle_rx(struct vhost_net *net)
 	else if (!sock_len)
 		vhost_net_enable_vq(net, vq);
 out:
-	vhost_net_signal_used(nvq);
+	vhost_net_signal_used(nvq, count);
 	mutex_unlock(&vq->mutex);
 }
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 1/2] vhost: basic in order support
  2025-07-08  6:48 ` [PATCH net-next 1/2] vhost: basic in order support Jason Wang
@ 2025-07-09 16:38   ` Jonah Palmer
  2025-07-10  9:04   ` Eugenio Perez Martin
  1 sibling, 0 replies; 8+ messages in thread
From: Jonah Palmer @ 2025-07-09 16:38 UTC (permalink / raw)
  To: Jason Wang, mst, eperezma; +Cc: kvm, virtualization, netdev, linux-kernel



On 7/8/25 2:48 AM, Jason Wang wrote:
> This patch adds basic in order support for vhost. Two optimizations
> are implemented in this patch:
> 
> 1) Since driver uses descriptor in order, vhost can deduce the next
>     avail ring head by counting the number of descriptors that has been
>     used in next_avail_head. This eliminate the need to access the
>     available ring in vhost.
> 
> 2) vhost_add_used_and_singal_n() is extended to accept the number of
>     batched buffers per used elem. While this increases the times of
>     usersapce memory access but it helps to reduce the chance of
>     used ring access of both the driver and vhost.
> 
> Vhost-net will be the first user for this.

Acked-by: Jonah Palmer <jonah.palmer@oracle.com>

> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>   drivers/vhost/net.c   |   6 ++-
>   drivers/vhost/vhost.c | 121 +++++++++++++++++++++++++++++++++++-------
>   drivers/vhost/vhost.h |   8 ++-
>   3 files changed, 111 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 7cbfc7d718b3..4f9c67f17b49 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -374,7 +374,8 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
>   	while (j) {
>   		add = min(UIO_MAXIOV - nvq->done_idx, j);
>   		vhost_add_used_and_signal_n(vq->dev, vq,
> -					    &vq->heads[nvq->done_idx], add);
> +					    &vq->heads[nvq->done_idx],
> +					    NULL, add);
>   		nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV;
>   		j -= add;
>   	}
> @@ -457,7 +458,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
>   	if (!nvq->done_idx)
>   		return;
>   
> -	vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
> +	vhost_add_used_and_signal_n(dev, vq, vq->heads, NULL,
> +				    nvq->done_idx);
>   	nvq->done_idx = 0;
>   }
>   
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 3a5ebb973dba..c7ed069fc49e 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -364,6 +364,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>   	vq->avail = NULL;
>   	vq->used = NULL;
>   	vq->last_avail_idx = 0;
> +	vq->next_avail_head = 0;
>   	vq->avail_idx = 0;
>   	vq->last_used_idx = 0;
>   	vq->signalled_used = 0;
> @@ -455,6 +456,8 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
>   	vq->log = NULL;
>   	kfree(vq->heads);
>   	vq->heads = NULL;
> +	kfree(vq->nheads);
> +	vq->nheads = NULL;
>   }
>   
>   /* Helper to allocate iovec buffers for all vqs. */
> @@ -472,7 +475,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>   					GFP_KERNEL);
>   		vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
>   					  GFP_KERNEL);
> -		if (!vq->indirect || !vq->log || !vq->heads)
> +		vq->nheads = kmalloc_array(dev->iov_limit, sizeof(*vq->nheads),
> +					   GFP_KERNEL);
> +		if (!vq->indirect || !vq->log || !vq->heads || !vq->nheads)
>   			goto err_nomem;
>   	}
>   	return 0;
> @@ -1990,14 +1995,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
>   			break;
>   		}
>   		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
> -			vq->last_avail_idx = s.num & 0xffff;
> +			vq->next_avail_head = vq->last_avail_idx =
> +					      s.num & 0xffff;
>   			vq->last_used_idx = (s.num >> 16) & 0xffff;
>   		} else {
>   			if (s.num > 0xffff) {
>   				r = -EINVAL;
>   				break;
>   			}
> -			vq->last_avail_idx = s.num;
> +			vq->next_avail_head = vq->last_avail_idx = s.num;
>   		}
>   		/* Forget the cached index value. */
>   		vq->avail_idx = vq->last_avail_idx;
> @@ -2590,11 +2596,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
>   		      unsigned int *out_num, unsigned int *in_num,
>   		      struct vhost_log *log, unsigned int *log_num)
>   {
> +	bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
>   	struct vring_desc desc;
>   	unsigned int i, head, found = 0;
>   	u16 last_avail_idx = vq->last_avail_idx;
>   	__virtio16 ring_head;
> -	int ret, access;
> +	int ret, access, c = 0;
>   
>   	if (vq->avail_idx == vq->last_avail_idx) {
>   		ret = vhost_get_avail_idx(vq);
> @@ -2605,17 +2612,21 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
>   			return vq->num;
>   	}
>   
> -	/* Grab the next descriptor number they're advertising, and increment
> -	 * the index we've seen. */
> -	if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) {
> -		vq_err(vq, "Failed to read head: idx %d address %p\n",
> -		       last_avail_idx,
> -		       &vq->avail->ring[last_avail_idx % vq->num]);
> -		return -EFAULT;
> +	if (in_order)
> +		head = vq->next_avail_head & (vq->num - 1);
> +	else {
> +		/* Grab the next descriptor number they're
> +		 * advertising, and increment the index we've seen. */
> +		if (unlikely(vhost_get_avail_head(vq, &ring_head,
> +						  last_avail_idx))) {
> +			vq_err(vq, "Failed to read head: idx %d address %p\n",
> +				last_avail_idx,
> +				&vq->avail->ring[last_avail_idx % vq->num]);
> +			return -EFAULT;
> +		}
> +		head = vhost16_to_cpu(vq, ring_head);
>   	}
>   
> -	head = vhost16_to_cpu(vq, ring_head);
> -
>   	/* If their number is silly, that's an error. */
>   	if (unlikely(head >= vq->num)) {
>   		vq_err(vq, "Guest says index %u > %u is available",
> @@ -2658,6 +2669,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
>   						"in indirect descriptor at idx %d\n", i);
>   				return ret;
>   			}
> +			++c;
>   			continue;
>   		}
>   
> @@ -2693,10 +2705,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
>   			}
>   			*out_num += ret;
>   		}
> +		++c;
>   	} while ((i = next_desc(vq, &desc)) != -1);
>   
>   	/* On success, increment avail index. */
>   	vq->last_avail_idx++;
> +	vq->next_avail_head += c;
>   
>   	/* Assume notifications from guest are disabled at this point,
>   	 * if they aren't we would need to update avail_event index. */
> @@ -2720,8 +2734,9 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
>   		cpu_to_vhost32(vq, head),
>   		cpu_to_vhost32(vq, len)
>   	};
> +	u16 nheads = 1;
>   
> -	return vhost_add_used_n(vq, &heads, 1);
> +	return vhost_add_used_n(vq, &heads, &nheads, 1);
>   }
>   EXPORT_SYMBOL_GPL(vhost_add_used);
>   
> @@ -2757,10 +2772,10 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
>   	return 0;
>   }
>   
> -/* After we've used one of their buffers, we tell them about it.  We'll then
> - * want to notify the guest, using eventfd. */
> -int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
> -		     unsigned count)
> +static int vhost_add_used_n_ooo(struct vhost_virtqueue *vq,
> +				struct vring_used_elem *heads,
> +				u16 *nheads,
> +				unsigned count)
>   {
>   	int start, n, r;
>   
> @@ -2775,6 +2790,70 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
>   	}
>   	r = __vhost_add_used_n(vq, heads, count);
>   
> +	return r;
> +}
> +
> +static int vhost_add_used_n_in_order(struct vhost_virtqueue *vq,
> +				     struct vring_used_elem *heads,
> +				     u16 *nheads,
> +				     unsigned count)
> +{
> +	vring_used_elem_t __user *used;
> +	u16 old, new = vq->last_used_idx;
> +	int start, i;
> +
> +	if (!nheads)
> +		return -EINVAL;
> +
> +	start = vq->last_used_idx & (vq->num - 1);
> +	used = vq->used->ring + start;
> +
> +	for (i = 0; i < count; i++) {
> +		if (vhost_put_used(vq, &heads[i], start, 1)) {
> +			vq_err(vq, "Failed to write used");
> +			return -EFAULT;
> +		}
> +		start += nheads[i];
> +		new += nheads[i];
> +		if (start >= vq->num)
> +			start -= vq->num;
> +	}
> +
> +	if (unlikely(vq->log_used)) {
> +		/* Make sure data is seen before log. */
> +		smp_wmb();
> +		/* Log used ring entry write. */
> +		log_used(vq, ((void __user *)used - (void __user *)vq->used),
> +			 (vq->num - start) * sizeof *used);
> +		if (start + count > vq->num)
> +			log_used(vq, 0,
> +				 (start + count - vq->num) * sizeof *used);
> +	}
> +
> +	old = vq->last_used_idx;
> +	vq->last_used_idx = new;
> +	/* If the driver never bothers to signal in a very long while,
> +	 * used index might wrap around. If that happens, invalidate
> +	 * signalled_used index we stored. TODO: make sure driver
> +	 * signals at least once in 2^16 and remove this. */
> +	if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
> +		vq->signalled_used_valid = false;
> +	return 0;
> +}
> +
> +/* After we've used one of their buffers, we tell them about it.  We'll then
> + * want to notify the guest, using eventfd. */
> +int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
> +		     u16 *nheads, unsigned count)
> +{
> +	bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
> +	int r;
> +
> +	if (!in_order || !nheads)
> +		r = vhost_add_used_n_ooo(vq, heads, nheads, count);
> +	else
> +		r = vhost_add_used_n_in_order(vq, heads, nheads, count);
> +
>   	/* Make sure buffer is written before we update index. */
>   	smp_wmb();
>   	if (vhost_put_used_idx(vq)) {
> @@ -2853,9 +2932,11 @@ EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
>   /* multi-buffer version of vhost_add_used_and_signal */
>   void vhost_add_used_and_signal_n(struct vhost_dev *dev,
>   				 struct vhost_virtqueue *vq,
> -				 struct vring_used_elem *heads, unsigned count)
> +				 struct vring_used_elem *heads,
> +				 u16 *nheads,
> +				 unsigned count)
>   {
> -	vhost_add_used_n(vq, heads, count);
> +	vhost_add_used_n(vq, heads, nheads, count);
>   	vhost_signal(dev, vq);
>   }
>   EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index bb75a292d50c..dca9f309d396 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -103,6 +103,8 @@ struct vhost_virtqueue {
>   	 * Values are limited to 0x7fff, and the high bit is used as
>   	 * a wrap counter when using VIRTIO_F_RING_PACKED. */
>   	u16 last_avail_idx;
> +	/* Next avail ring head when VIRTIO_F_IN_ORDER is neogitated */
> +	u16 next_avail_head;
>   
>   	/* Caches available index value from user. */
>   	u16 avail_idx;
> @@ -129,6 +131,7 @@ struct vhost_virtqueue {
>   	struct iovec iotlb_iov[64];
>   	struct iovec *indirect;
>   	struct vring_used_elem *heads;
> +	u16 *nheads;
>   	/* Protected by virtqueue mutex. */
>   	struct vhost_iotlb *umem;
>   	struct vhost_iotlb *iotlb;
> @@ -213,11 +216,12 @@ bool vhost_vq_is_setup(struct vhost_virtqueue *vq);
>   int vhost_vq_init_access(struct vhost_virtqueue *);
>   int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
>   int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
> -		     unsigned count);
> +		     u16 *nheads, unsigned count);
>   void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
>   			       unsigned int id, int len);
>   void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
> -			       struct vring_used_elem *heads, unsigned count);
> +				 struct vring_used_elem *heads, u16 *nheads,
> +				 unsigned count);
>   void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
>   void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
>   bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *);


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 2/2] vhost_net: basic in_order support
  2025-07-08  6:48 ` [PATCH net-next 2/2] vhost_net: basic in_order support Jason Wang
@ 2025-07-09 16:38   ` Jonah Palmer
  2025-07-10 11:56   ` Eugenio Perez Martin
  1 sibling, 0 replies; 8+ messages in thread
From: Jonah Palmer @ 2025-07-09 16:38 UTC (permalink / raw)
  To: Jason Wang, mst, eperezma; +Cc: kvm, virtualization, netdev, linux-kernel



On 7/8/25 2:48 AM, Jason Wang wrote:
> This patch introduces basic in-order support for vhost-net. By
> recording the number of batched buffers in an array when calling
> `vhost_add_used_and_signal_n()`, we can reduce the number of userspace
> accesses. Note that the vhost-net batching logic is kept as we still
> count the number of buffers there.
> 
> Testing Results:
> 
> With testpmd:
> 
> - TX: txonly mode + vhost_net with XDP_DROP on TAP shows a 17.5%
>    improvement, from 4.75 Mpps to 5.35 Mpps.
> - RX: No obvious improvements were observed.
> 
> With virtio-ring in-order experimental code in the guest:
> 
> - TX: pktgen in the guest + XDP_DROP on  TAP shows a 19% improvement,
>    from 5.2 Mpps to 6.2 Mpps.
> - RX: pktgen on TAP with vhost_net + XDP_DROP in the guest achieves a
>    6.1% improvement, from 3.47 Mpps to 3.61 Mpps.

Acked-by: Jonah Palmer <jonah.palmer@oracle.com>

> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>   drivers/vhost/net.c | 86 ++++++++++++++++++++++++++++++++-------------
>   1 file changed, 61 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 4f9c67f17b49..8ac994b3228a 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -74,7 +74,8 @@ enum {
>   			 (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
>   			 (1ULL << VIRTIO_NET_F_MRG_RXBUF) |
>   			 (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
> -			 (1ULL << VIRTIO_F_RING_RESET)
> +			 (1ULL << VIRTIO_F_RING_RESET) |
> +			 (1ULL << VIRTIO_F_IN_ORDER)
>   };
>   
>   enum {
> @@ -450,7 +451,8 @@ static int vhost_net_enable_vq(struct vhost_net *n,
>   	return vhost_poll_start(poll, sock->file);
>   }
>   
> -static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
> +static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq,
> +				  unsigned int count)
>   {
>   	struct vhost_virtqueue *vq = &nvq->vq;
>   	struct vhost_dev *dev = vq->dev;
> @@ -458,8 +460,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
>   	if (!nvq->done_idx)
>   		return;
>   
> -	vhost_add_used_and_signal_n(dev, vq, vq->heads, NULL,
> -				    nvq->done_idx);
> +	vhost_add_used_and_signal_n(dev, vq, vq->heads,
> +				    vq->nheads, count);
>   	nvq->done_idx = 0;
>   }
>   
> @@ -468,6 +470,8 @@ static void vhost_tx_batch(struct vhost_net *net,
>   			   struct socket *sock,
>   			   struct msghdr *msghdr)
>   {
> +	struct vhost_virtqueue *vq = &nvq->vq;
> +	bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
>   	struct tun_msg_ctl ctl = {
>   		.type = TUN_MSG_PTR,
>   		.num = nvq->batched_xdp,
> @@ -475,6 +479,11 @@ static void vhost_tx_batch(struct vhost_net *net,
>   	};
>   	int i, err;
>   
> +	if (in_order) {
> +		vq->heads[0].len = 0;
> +		vq->nheads[0] = nvq->done_idx;
> +	}
> +
>   	if (nvq->batched_xdp == 0)
>   		goto signal_used;
>   
> @@ -496,7 +505,7 @@ static void vhost_tx_batch(struct vhost_net *net,
>   	}
>   
>   signal_used:
> -	vhost_net_signal_used(nvq);
> +	vhost_net_signal_used(nvq, in_order ? 1 : nvq->done_idx);
>   	nvq->batched_xdp = 0;
>   }
>   
> @@ -758,6 +767,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
>   	int sent_pkts = 0;
>   	bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
>   	bool busyloop_intr;
> +	bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
>   
>   	do {
>   		busyloop_intr = false;
> @@ -794,11 +804,13 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
>   				break;
>   			}
>   
> -			/* We can't build XDP buff, go for single
> -			 * packet path but let's flush batched
> -			 * packets.
> -			 */
> -			vhost_tx_batch(net, nvq, sock, &msg);
> +			if (nvq->batched_xdp) {
> +				/* We can't build XDP buff, go for single
> +				 * packet path but let's flush batched
> +				 * packets.
> +				 */
> +				vhost_tx_batch(net, nvq, sock, &msg);
> +			}
>   			msg.msg_control = NULL;
>   		} else {
>   			if (tx_can_batch(vq, total_len))
> @@ -819,8 +831,12 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
>   			pr_debug("Truncated TX packet: len %d != %zd\n",
>   				 err, len);
>   done:
> -		vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
> -		vq->heads[nvq->done_idx].len = 0;
> +		if (in_order) {
> +			vq->heads[0].id = cpu_to_vhost32(vq, head);
> +		} else {
> +			vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
> +			vq->heads[nvq->done_idx].len = 0;
> +		}
>   		++nvq->done_idx;
>   	} while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
>   
> @@ -999,7 +1015,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
>   }
>   
>   static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
> -				      bool *busyloop_intr)
> +				      bool *busyloop_intr, unsigned int count)
>   {
>   	struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
>   	struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
> @@ -1009,7 +1025,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
>   
>   	if (!len && rvq->busyloop_timeout) {
>   		/* Flush batched heads first */
> -		vhost_net_signal_used(rnvq);
> +		vhost_net_signal_used(rnvq, count);
>   		/* Both tx vq and rx socket were polled here */
>   		vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true);
>   
> @@ -1021,7 +1037,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
>   
>   /* This is a multi-buffer version of vhost_get_desc, that works if
>    *	vq has read descriptors only.
> - * @vq		- the relevant virtqueue
> + * @nvq		- the relevant vhost_net virtqueue
>    * @datalen	- data length we'll be reading
>    * @iovcount	- returned count of io vectors we fill
>    * @log		- vhost log
> @@ -1029,14 +1045,17 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
>    * @quota       - headcount quota, 1 for big buffer
>    *	returns number of buffer heads allocated, negative on error
>    */
> -static int get_rx_bufs(struct vhost_virtqueue *vq,
> +static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
>   		       struct vring_used_elem *heads,
> +		       u16 *nheads,
>   		       int datalen,
>   		       unsigned *iovcount,
>   		       struct vhost_log *log,
>   		       unsigned *log_num,
>   		       unsigned int quota)
>   {
> +	struct vhost_virtqueue *vq = &nvq->vq;
> +	bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
>   	unsigned int out, in;
>   	int seg = 0;
>   	int headcount = 0;
> @@ -1073,14 +1092,16 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
>   			nlogs += *log_num;
>   			log += *log_num;
>   		}
> -		heads[headcount].id = cpu_to_vhost32(vq, d);
>   		len = iov_length(vq->iov + seg, in);
> -		heads[headcount].len = cpu_to_vhost32(vq, len);
> -		datalen -= len;
> +		if (!in_order) {
> +			heads[headcount].id = cpu_to_vhost32(vq, d);
> +			heads[headcount].len = cpu_to_vhost32(vq, len);
> +		}
>   		++headcount;
> +		datalen -= len;
>   		seg += in;
>   	}
> -	heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
> +
>   	*iovcount = seg;
>   	if (unlikely(log))
>   		*log_num = nlogs;
> @@ -1090,6 +1111,15 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
>   		r = UIO_MAXIOV + 1;
>   		goto err;
>   	}
> +
> +	if (!in_order)
> +		heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
> +	else {
> +		heads[0].len = cpu_to_vhost32(vq, len + datalen);
> +		heads[0].id = cpu_to_vhost32(vq, d);
> +		nheads[0] = headcount;
> +	}
> +
>   	return headcount;
>   err:
>   	vhost_discard_vq_desc(vq, headcount);
> @@ -1102,6 +1132,8 @@ static void handle_rx(struct vhost_net *net)
>   {
>   	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
>   	struct vhost_virtqueue *vq = &nvq->vq;
> +	bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
> +	unsigned int count = 0;
>   	unsigned in, log;
>   	struct vhost_log *vq_log;
>   	struct msghdr msg = {
> @@ -1149,12 +1181,13 @@ static void handle_rx(struct vhost_net *net)
>   
>   	do {
>   		sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
> -						      &busyloop_intr);
> +						      &busyloop_intr, count);
>   		if (!sock_len)
>   			break;
>   		sock_len += sock_hlen;
>   		vhost_len = sock_len + vhost_hlen;
> -		headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
> +		headcount = get_rx_bufs(nvq, vq->heads + count,
> +					vq->nheads + count,
>   					vhost_len, &in, vq_log, &log,
>   					likely(mergeable) ? UIO_MAXIOV : 1);
>   		/* On error, stop handling until the next kick. */
> @@ -1230,8 +1263,11 @@ static void handle_rx(struct vhost_net *net)
>   			goto out;
>   		}
>   		nvq->done_idx += headcount;
> -		if (nvq->done_idx > VHOST_NET_BATCH)
> -			vhost_net_signal_used(nvq);
> +		count += in_order ? 1 : headcount;
> +		if (nvq->done_idx > VHOST_NET_BATCH) {
> +			vhost_net_signal_used(nvq, count);
> +			count = 0;
> +		}
>   		if (unlikely(vq_log))
>   			vhost_log_write(vq, vq_log, log, vhost_len,
>   					vq->iov, in);
> @@ -1243,7 +1279,7 @@ static void handle_rx(struct vhost_net *net)
>   	else if (!sock_len)
>   		vhost_net_enable_vq(net, vq);
>   out:
> -	vhost_net_signal_used(nvq);
> +	vhost_net_signal_used(nvq, count);
>   	mutex_unlock(&vq->mutex);
>   }
>   


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 1/2] vhost: basic in order support
  2025-07-08  6:48 ` [PATCH net-next 1/2] vhost: basic in order support Jason Wang
  2025-07-09 16:38   ` Jonah Palmer
@ 2025-07-10  9:04   ` Eugenio Perez Martin
  2025-07-11  1:44     ` Jason Wang
  1 sibling, 1 reply; 8+ messages in thread
From: Eugenio Perez Martin @ 2025-07-10  9:04 UTC (permalink / raw)
  To: Jason Wang; +Cc: mst, kvm, virtualization, netdev, linux-kernel, jonah.palmer

On Tue, Jul 8, 2025 at 8:48 AM Jason Wang <jasowang@redhat.com> wrote:
>
> This patch adds basic in order support for vhost. Two optimizations
> are implemented in this patch:
>
> 1) Since driver uses descriptor in order, vhost can deduce the next
>    avail ring head by counting the number of descriptors that has been
>    used in next_avail_head. This eliminate the need to access the
>    available ring in vhost.
>
> 2) vhost_add_used_and_singal_n() is extended to accept the number of
>    batched buffers per used elem. While this increases the times of
>    usersapce memory access but it helps to reduce the chance of

s/usersapce/userspace/

>    used ring access of both the driver and vhost.
>
> Vhost-net will be the first user for this.
>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  drivers/vhost/net.c   |   6 ++-
>  drivers/vhost/vhost.c | 121 +++++++++++++++++++++++++++++++++++-------
>  drivers/vhost/vhost.h |   8 ++-
>  3 files changed, 111 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 7cbfc7d718b3..4f9c67f17b49 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -374,7 +374,8 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
>         while (j) {
>                 add = min(UIO_MAXIOV - nvq->done_idx, j);
>                 vhost_add_used_and_signal_n(vq->dev, vq,
> -                                           &vq->heads[nvq->done_idx], add);
> +                                           &vq->heads[nvq->done_idx],
> +                                           NULL, add);
>                 nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV;
>                 j -= add;
>         }
> @@ -457,7 +458,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
>         if (!nvq->done_idx)
>                 return;
>
> -       vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
> +       vhost_add_used_and_signal_n(dev, vq, vq->heads, NULL,
> +                                   nvq->done_idx);
>         nvq->done_idx = 0;
>  }
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 3a5ebb973dba..c7ed069fc49e 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -364,6 +364,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>         vq->avail = NULL;
>         vq->used = NULL;
>         vq->last_avail_idx = 0;
> +       vq->next_avail_head = 0;
>         vq->avail_idx = 0;
>         vq->last_used_idx = 0;
>         vq->signalled_used = 0;
> @@ -455,6 +456,8 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
>         vq->log = NULL;
>         kfree(vq->heads);
>         vq->heads = NULL;
> +       kfree(vq->nheads);
> +       vq->nheads = NULL;
>  }
>
>  /* Helper to allocate iovec buffers for all vqs. */
> @@ -472,7 +475,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>                                         GFP_KERNEL);
>                 vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
>                                           GFP_KERNEL);
> -               if (!vq->indirect || !vq->log || !vq->heads)
> +               vq->nheads = kmalloc_array(dev->iov_limit, sizeof(*vq->nheads),
> +                                          GFP_KERNEL);
> +               if (!vq->indirect || !vq->log || !vq->heads || !vq->nheads)
>                         goto err_nomem;
>         }
>         return 0;
> @@ -1990,14 +1995,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
>                         break;
>                 }
>                 if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
> -                       vq->last_avail_idx = s.num & 0xffff;
> +                       vq->next_avail_head = vq->last_avail_idx =
> +                                             s.num & 0xffff;
>                         vq->last_used_idx = (s.num >> 16) & 0xffff;
>                 } else {
>                         if (s.num > 0xffff) {
>                                 r = -EINVAL;
>                                 break;
>                         }
> -                       vq->last_avail_idx = s.num;
> +                       vq->next_avail_head = vq->last_avail_idx = s.num;

Why not just reuse last_avail_idx instead of creating next_avail_head?

At first glance it seemed to me that it was done this way to support
rewinding, but in_order path will happily reuse next_avail_head
without checking for last_avail_idx except for checking if the ring is
empty. Am I missing something?

>                 }
>                 /* Forget the cached index value. */
>                 vq->avail_idx = vq->last_avail_idx;
> @@ -2590,11 +2596,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
>                       unsigned int *out_num, unsigned int *in_num,
>                       struct vhost_log *log, unsigned int *log_num)
>  {
> +       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
>         struct vring_desc desc;
>         unsigned int i, head, found = 0;
>         u16 last_avail_idx = vq->last_avail_idx;
>         __virtio16 ring_head;
> -       int ret, access;
> +       int ret, access, c = 0;
>
>         if (vq->avail_idx == vq->last_avail_idx) {
>                 ret = vhost_get_avail_idx(vq);
> @@ -2605,17 +2612,21 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
>                         return vq->num;
>         }
>
> -       /* Grab the next descriptor number they're advertising, and increment
> -        * the index we've seen. */
> -       if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) {
> -               vq_err(vq, "Failed to read head: idx %d address %p\n",
> -                      last_avail_idx,
> -                      &vq->avail->ring[last_avail_idx % vq->num]);
> -               return -EFAULT;
> +       if (in_order)
> +               head = vq->next_avail_head & (vq->num - 1);
> +       else {
> +               /* Grab the next descriptor number they're
> +                * advertising, and increment the index we've seen. */
> +               if (unlikely(vhost_get_avail_head(vq, &ring_head,
> +                                                 last_avail_idx))) {
> +                       vq_err(vq, "Failed to read head: idx %d address %p\n",
> +                               last_avail_idx,
> +                               &vq->avail->ring[last_avail_idx % vq->num]);
> +                       return -EFAULT;
> +               }
> +               head = vhost16_to_cpu(vq, ring_head);
>         }
>
> -       head = vhost16_to_cpu(vq, ring_head);
> -
>         /* If their number is silly, that's an error. */
>         if (unlikely(head >= vq->num)) {
>                 vq_err(vq, "Guest says index %u > %u is available",
> @@ -2658,6 +2669,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
>                                                 "in indirect descriptor at idx %d\n", i);
>                                 return ret;
>                         }
> +                       ++c;
>                         continue;
>                 }
>
> @@ -2693,10 +2705,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
>                         }
>                         *out_num += ret;
>                 }
> +               ++c;
>         } while ((i = next_desc(vq, &desc)) != -1);
>
>         /* On success, increment avail index. */
>         vq->last_avail_idx++;
> +       vq->next_avail_head += c;
>
>         /* Assume notifications from guest are disabled at this point,
>          * if they aren't we would need to update avail_event index. */
> @@ -2720,8 +2734,9 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
>                 cpu_to_vhost32(vq, head),
>                 cpu_to_vhost32(vq, len)
>         };
> +       u16 nheads = 1;
>
> -       return vhost_add_used_n(vq, &heads, 1);
> +       return vhost_add_used_n(vq, &heads, &nheads, 1);
>  }
>  EXPORT_SYMBOL_GPL(vhost_add_used);
>
> @@ -2757,10 +2772,10 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
>         return 0;
>  }
>
> -/* After we've used one of their buffers, we tell them about it.  We'll then
> - * want to notify the guest, using eventfd. */
> -int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
> -                    unsigned count)
> +static int vhost_add_used_n_ooo(struct vhost_virtqueue *vq,
> +                               struct vring_used_elem *heads,
> +                               u16 *nheads,
> +                               unsigned count)

nheads is not used in this function and it is checked to be NULL in
the caller, should we remove it from the parameter list?

>  {
>         int start, n, r;
>
> @@ -2775,6 +2790,70 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
>         }
>         r = __vhost_add_used_n(vq, heads, count);
>
> +       return r;

Nit: We can merge with the previous statement and do "return
__vhost_add_used_n(vq, heads, count);"

> +}
> +
> +static int vhost_add_used_n_in_order(struct vhost_virtqueue *vq,
> +                                    struct vring_used_elem *heads,
> +                                    u16 *nheads,

Nit: we can const-ify nheads, and do the same for _in_order variant
and vhost_add_used_n. Actually we can do it with heads too but it
requires more changes to existing code. I think it would be nice to
constify *nheads if you need to respin.

> +                                    unsigned count)
> +{
> +       vring_used_elem_t __user *used;
> +       u16 old, new = vq->last_used_idx;
> +       int start, i;
> +
> +       if (!nheads)
> +               return -EINVAL;
> +
> +       start = vq->last_used_idx & (vq->num - 1);
> +       used = vq->used->ring + start;
> +
> +       for (i = 0; i < count; i++) {
> +               if (vhost_put_used(vq, &heads[i], start, 1)) {
> +                       vq_err(vq, "Failed to write used");
> +                       return -EFAULT;
> +               }
> +               start += nheads[i];
> +               new += nheads[i];
> +               if (start >= vq->num)
> +                       start -= vq->num;
> +       }
> +
> +       if (unlikely(vq->log_used)) {
> +               /* Make sure data is seen before log. */
> +               smp_wmb();
> +               /* Log used ring entry write. */
> +               log_used(vq, ((void __user *)used - (void __user *)vq->used),
> +                        (vq->num - start) * sizeof *used);
> +               if (start + count > vq->num)
> +                       log_used(vq, 0,
> +                                (start + count - vq->num) * sizeof *used);
> +       }
> +
> +       old = vq->last_used_idx;
> +       vq->last_used_idx = new;
> +       /* If the driver never bothers to signal in a very long while,
> +        * used index might wrap around. If that happens, invalidate
> +        * signalled_used index we stored. TODO: make sure driver
> +        * signals at least once in 2^16 and remove this. */
> +       if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
> +               vq->signalled_used_valid = false;
> +       return 0;
> +}
> +
> +/* After we've used one of their buffers, we tell them about it.  We'll then
> + * want to notify the guest, using eventfd. */
> +int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
> +                    u16 *nheads, unsigned count)
> +{
> +       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
> +       int r;
> +
> +       if (!in_order || !nheads)
> +               r = vhost_add_used_n_ooo(vq, heads, nheads, count);
> +       else
> +               r = vhost_add_used_n_in_order(vq, heads, nheads, count);
> +

I just realized the original code didn't do it either, but we should
return if r < 0 here. Otherwise, used->ring[] has a random value and
used->idx is incremented covering these values. This should be
triggable in a guest that set used->idx valid but used->ring[]
invalid, for example.

>         /* Make sure buffer is written before we update index. */
>         smp_wmb();
>         if (vhost_put_used_idx(vq)) {
> @@ -2853,9 +2932,11 @@ EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
>  /* multi-buffer version of vhost_add_used_and_signal */
>  void vhost_add_used_and_signal_n(struct vhost_dev *dev,
>                                  struct vhost_virtqueue *vq,
> -                                struct vring_used_elem *heads, unsigned count)
> +                                struct vring_used_elem *heads,
> +                                u16 *nheads,
> +                                unsigned count)
>  {
> -       vhost_add_used_n(vq, heads, count);
> +       vhost_add_used_n(vq, heads, nheads, count);
>         vhost_signal(dev, vq);
>  }
>  EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index bb75a292d50c..dca9f309d396 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -103,6 +103,8 @@ struct vhost_virtqueue {
>          * Values are limited to 0x7fff, and the high bit is used as
>          * a wrap counter when using VIRTIO_F_RING_PACKED. */
>         u16 last_avail_idx;
> +       /* Next avail ring head when VIRTIO_F_IN_ORDER is neogitated */

s/neogitated/negotiated/

> +       u16 next_avail_head;
>
>         /* Caches available index value from user. */
>         u16 avail_idx;
> @@ -129,6 +131,7 @@ struct vhost_virtqueue {
>         struct iovec iotlb_iov[64];
>         struct iovec *indirect;
>         struct vring_used_elem *heads;
> +       u16 *nheads;
>         /* Protected by virtqueue mutex. */
>         struct vhost_iotlb *umem;
>         struct vhost_iotlb *iotlb;
> @@ -213,11 +216,12 @@ bool vhost_vq_is_setup(struct vhost_virtqueue *vq);
>  int vhost_vq_init_access(struct vhost_virtqueue *);
>  int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
>  int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
> -                    unsigned count);
> +                    u16 *nheads, unsigned count);
>  void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
>                                unsigned int id, int len);
>  void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
> -                              struct vring_used_elem *heads, unsigned count);
> +                                struct vring_used_elem *heads, u16 *nheads,
> +                                unsigned count);
>  void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
>  void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
>  bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *);
> --
> 2.31.1
>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 2/2] vhost_net: basic in_order support
  2025-07-08  6:48 ` [PATCH net-next 2/2] vhost_net: basic in_order support Jason Wang
  2025-07-09 16:38   ` Jonah Palmer
@ 2025-07-10 11:56   ` Eugenio Perez Martin
  1 sibling, 0 replies; 8+ messages in thread
From: Eugenio Perez Martin @ 2025-07-10 11:56 UTC (permalink / raw)
  To: Jason Wang; +Cc: mst, kvm, virtualization, netdev, linux-kernel, jonah.palmer

On Tue, Jul 8, 2025 at 8:48 AM Jason Wang <jasowang@redhat.com> wrote:
>
> This patch introduces basic in-order support for vhost-net. By
> recording the number of batched buffers in an array when calling
> `vhost_add_used_and_signal_n()`, we can reduce the number of userspace
> accesses. Note that the vhost-net batching logic is kept as we still
> count the number of buffers there.
>
> Testing Results:
>
> With testpmd:
>
> - TX: txonly mode + vhost_net with XDP_DROP on TAP shows a 17.5%
>   improvement, from 4.75 Mpps to 5.35 Mpps.
> - RX: No obvious improvements were observed.
>
> With virtio-ring in-order experimental code in the guest:
>
> - TX: pktgen in the guest + XDP_DROP on  TAP shows a 19% improvement,
>   from 5.2 Mpps to 6.2 Mpps.
> - RX: pktgen on TAP with vhost_net + XDP_DROP in the guest achieves a
>   6.1% improvement, from 3.47 Mpps to 3.61 Mpps.
>
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Acked-by: Eugenio Pérez <eperezma@redhat.com>

Thanks!

> ---
>  drivers/vhost/net.c | 86 ++++++++++++++++++++++++++++++++-------------
>  1 file changed, 61 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 4f9c67f17b49..8ac994b3228a 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -74,7 +74,8 @@ enum {
>                          (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
>                          (1ULL << VIRTIO_NET_F_MRG_RXBUF) |
>                          (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
> -                        (1ULL << VIRTIO_F_RING_RESET)
> +                        (1ULL << VIRTIO_F_RING_RESET) |
> +                        (1ULL << VIRTIO_F_IN_ORDER)
>  };
>
>  enum {
> @@ -450,7 +451,8 @@ static int vhost_net_enable_vq(struct vhost_net *n,
>         return vhost_poll_start(poll, sock->file);
>  }
>
> -static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
> +static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq,
> +                                 unsigned int count)
>  {
>         struct vhost_virtqueue *vq = &nvq->vq;
>         struct vhost_dev *dev = vq->dev;
> @@ -458,8 +460,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
>         if (!nvq->done_idx)
>                 return;
>
> -       vhost_add_used_and_signal_n(dev, vq, vq->heads, NULL,
> -                                   nvq->done_idx);
> +       vhost_add_used_and_signal_n(dev, vq, vq->heads,
> +                                   vq->nheads, count);
>         nvq->done_idx = 0;
>  }
>
> @@ -468,6 +470,8 @@ static void vhost_tx_batch(struct vhost_net *net,
>                            struct socket *sock,
>                            struct msghdr *msghdr)
>  {
> +       struct vhost_virtqueue *vq = &nvq->vq;
> +       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
>         struct tun_msg_ctl ctl = {
>                 .type = TUN_MSG_PTR,
>                 .num = nvq->batched_xdp,
> @@ -475,6 +479,11 @@ static void vhost_tx_batch(struct vhost_net *net,
>         };
>         int i, err;
>
> +       if (in_order) {
> +               vq->heads[0].len = 0;
> +               vq->nheads[0] = nvq->done_idx;
> +       }
> +
>         if (nvq->batched_xdp == 0)
>                 goto signal_used;
>
> @@ -496,7 +505,7 @@ static void vhost_tx_batch(struct vhost_net *net,
>         }
>
>  signal_used:
> -       vhost_net_signal_used(nvq);
> +       vhost_net_signal_used(nvq, in_order ? 1 : nvq->done_idx);
>         nvq->batched_xdp = 0;
>  }
>
> @@ -758,6 +767,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
>         int sent_pkts = 0;
>         bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
>         bool busyloop_intr;
> +       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
>
>         do {
>                 busyloop_intr = false;
> @@ -794,11 +804,13 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
>                                 break;
>                         }
>
> -                       /* We can't build XDP buff, go for single
> -                        * packet path but let's flush batched
> -                        * packets.
> -                        */
> -                       vhost_tx_batch(net, nvq, sock, &msg);
> +                       if (nvq->batched_xdp) {
> +                               /* We can't build XDP buff, go for single
> +                                * packet path but let's flush batched
> +                                * packets.
> +                                */
> +                               vhost_tx_batch(net, nvq, sock, &msg);
> +                       }
>                         msg.msg_control = NULL;
>                 } else {
>                         if (tx_can_batch(vq, total_len))
> @@ -819,8 +831,12 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
>                         pr_debug("Truncated TX packet: len %d != %zd\n",
>                                  err, len);
>  done:
> -               vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
> -               vq->heads[nvq->done_idx].len = 0;
> +               if (in_order) {
> +                       vq->heads[0].id = cpu_to_vhost32(vq, head);
> +               } else {
> +                       vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
> +                       vq->heads[nvq->done_idx].len = 0;
> +               }
>                 ++nvq->done_idx;
>         } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
>
> @@ -999,7 +1015,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
>  }
>
>  static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
> -                                     bool *busyloop_intr)
> +                                     bool *busyloop_intr, unsigned int count)
>  {
>         struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
>         struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
> @@ -1009,7 +1025,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
>
>         if (!len && rvq->busyloop_timeout) {
>                 /* Flush batched heads first */
> -               vhost_net_signal_used(rnvq);
> +               vhost_net_signal_used(rnvq, count);
>                 /* Both tx vq and rx socket were polled here */
>                 vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true);
>
> @@ -1021,7 +1037,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
>
>  /* This is a multi-buffer version of vhost_get_desc, that works if
>   *     vq has read descriptors only.
> - * @vq         - the relevant virtqueue
> + * @nvq                - the relevant vhost_net virtqueue
>   * @datalen    - data length we'll be reading
>   * @iovcount   - returned count of io vectors we fill
>   * @log                - vhost log
> @@ -1029,14 +1045,17 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
>   * @quota       - headcount quota, 1 for big buffer
>   *     returns number of buffer heads allocated, negative on error
>   */
> -static int get_rx_bufs(struct vhost_virtqueue *vq,
> +static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
>                        struct vring_used_elem *heads,
> +                      u16 *nheads,
>                        int datalen,
>                        unsigned *iovcount,
>                        struct vhost_log *log,
>                        unsigned *log_num,
>                        unsigned int quota)
>  {
> +       struct vhost_virtqueue *vq = &nvq->vq;
> +       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
>         unsigned int out, in;
>         int seg = 0;
>         int headcount = 0;
> @@ -1073,14 +1092,16 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
>                         nlogs += *log_num;
>                         log += *log_num;
>                 }
> -               heads[headcount].id = cpu_to_vhost32(vq, d);
>                 len = iov_length(vq->iov + seg, in);
> -               heads[headcount].len = cpu_to_vhost32(vq, len);
> -               datalen -= len;
> +               if (!in_order) {
> +                       heads[headcount].id = cpu_to_vhost32(vq, d);
> +                       heads[headcount].len = cpu_to_vhost32(vq, len);
> +               }
>                 ++headcount;
> +               datalen -= len;
>                 seg += in;
>         }
> -       heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
> +
>         *iovcount = seg;
>         if (unlikely(log))
>                 *log_num = nlogs;
> @@ -1090,6 +1111,15 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
>                 r = UIO_MAXIOV + 1;
>                 goto err;
>         }
> +
> +       if (!in_order)
> +               heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
> +       else {
> +               heads[0].len = cpu_to_vhost32(vq, len + datalen);
> +               heads[0].id = cpu_to_vhost32(vq, d);
> +               nheads[0] = headcount;
> +       }
> +
>         return headcount;
>  err:
>         vhost_discard_vq_desc(vq, headcount);
> @@ -1102,6 +1132,8 @@ static void handle_rx(struct vhost_net *net)
>  {
>         struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
>         struct vhost_virtqueue *vq = &nvq->vq;
> +       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
> +       unsigned int count = 0;
>         unsigned in, log;
>         struct vhost_log *vq_log;
>         struct msghdr msg = {
> @@ -1149,12 +1181,13 @@ static void handle_rx(struct vhost_net *net)
>
>         do {
>                 sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
> -                                                     &busyloop_intr);
> +                                                     &busyloop_intr, count);
>                 if (!sock_len)
>                         break;
>                 sock_len += sock_hlen;
>                 vhost_len = sock_len + vhost_hlen;
> -               headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
> +               headcount = get_rx_bufs(nvq, vq->heads + count,
> +                                       vq->nheads + count,
>                                         vhost_len, &in, vq_log, &log,
>                                         likely(mergeable) ? UIO_MAXIOV : 1);
>                 /* On error, stop handling until the next kick. */
> @@ -1230,8 +1263,11 @@ static void handle_rx(struct vhost_net *net)
>                         goto out;
>                 }
>                 nvq->done_idx += headcount;
> -               if (nvq->done_idx > VHOST_NET_BATCH)
> -                       vhost_net_signal_used(nvq);
> +               count += in_order ? 1 : headcount;
> +               if (nvq->done_idx > VHOST_NET_BATCH) {
> +                       vhost_net_signal_used(nvq, count);
> +                       count = 0;
> +               }
>                 if (unlikely(vq_log))
>                         vhost_log_write(vq, vq_log, log, vhost_len,
>                                         vq->iov, in);
> @@ -1243,7 +1279,7 @@ static void handle_rx(struct vhost_net *net)
>         else if (!sock_len)
>                 vhost_net_enable_vq(net, vq);
>  out:
> -       vhost_net_signal_used(nvq);
> +       vhost_net_signal_used(nvq, count);
>         mutex_unlock(&vq->mutex);
>  }
>
> --
> 2.31.1
>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 1/2] vhost: basic in order support
  2025-07-10  9:04   ` Eugenio Perez Martin
@ 2025-07-11  1:44     ` Jason Wang
  0 siblings, 0 replies; 8+ messages in thread
From: Jason Wang @ 2025-07-11  1:44 UTC (permalink / raw)
  To: Eugenio Perez Martin
  Cc: mst, kvm, virtualization, netdev, linux-kernel, jonah.palmer

On Thu, Jul 10, 2025 at 5:05 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Tue, Jul 8, 2025 at 8:48 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > This patch adds basic in order support for vhost. Two optimizations
> > are implemented in this patch:
> >
> > 1) Since driver uses descriptor in order, vhost can deduce the next
> >    avail ring head by counting the number of descriptors that has been
> >    used in next_avail_head. This eliminate the need to access the
> >    available ring in vhost.
> >
> > 2) vhost_add_used_and_singal_n() is extended to accept the number of
> >    batched buffers per used elem. While this increases the times of
> >    usersapce memory access but it helps to reduce the chance of
>
> s/usersapce/userspace/
>
> >    used ring access of both the driver and vhost.
> >
> > Vhost-net will be the first user for this.
> >
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> >  drivers/vhost/net.c   |   6 ++-
> >  drivers/vhost/vhost.c | 121 +++++++++++++++++++++++++++++++++++-------
> >  drivers/vhost/vhost.h |   8 ++-
> >  3 files changed, 111 insertions(+), 24 deletions(-)
> >
> > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> > index 7cbfc7d718b3..4f9c67f17b49 100644
> > --- a/drivers/vhost/net.c
> > +++ b/drivers/vhost/net.c
> > @@ -374,7 +374,8 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
> >         while (j) {
> >                 add = min(UIO_MAXIOV - nvq->done_idx, j);
> >                 vhost_add_used_and_signal_n(vq->dev, vq,
> > -                                           &vq->heads[nvq->done_idx], add);
> > +                                           &vq->heads[nvq->done_idx],
> > +                                           NULL, add);
> >                 nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV;
> >                 j -= add;
> >         }
> > @@ -457,7 +458,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
> >         if (!nvq->done_idx)
> >                 return;
> >
> > -       vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
> > +       vhost_add_used_and_signal_n(dev, vq, vq->heads, NULL,
> > +                                   nvq->done_idx);
> >         nvq->done_idx = 0;
> >  }
> >
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index 3a5ebb973dba..c7ed069fc49e 100644
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -364,6 +364,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> >         vq->avail = NULL;
> >         vq->used = NULL;
> >         vq->last_avail_idx = 0;
> > +       vq->next_avail_head = 0;
> >         vq->avail_idx = 0;
> >         vq->last_used_idx = 0;
> >         vq->signalled_used = 0;
> > @@ -455,6 +456,8 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
> >         vq->log = NULL;
> >         kfree(vq->heads);
> >         vq->heads = NULL;
> > +       kfree(vq->nheads);
> > +       vq->nheads = NULL;
> >  }
> >
> >  /* Helper to allocate iovec buffers for all vqs. */
> > @@ -472,7 +475,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
> >                                         GFP_KERNEL);
> >                 vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
> >                                           GFP_KERNEL);
> > -               if (!vq->indirect || !vq->log || !vq->heads)
> > +               vq->nheads = kmalloc_array(dev->iov_limit, sizeof(*vq->nheads),
> > +                                          GFP_KERNEL);
> > +               if (!vq->indirect || !vq->log || !vq->heads || !vq->nheads)
> >                         goto err_nomem;
> >         }
> >         return 0;
> > @@ -1990,14 +1995,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
> >                         break;
> >                 }
> >                 if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
> > -                       vq->last_avail_idx = s.num & 0xffff;
> > +                       vq->next_avail_head = vq->last_avail_idx =
> > +                                             s.num & 0xffff;
> >                         vq->last_used_idx = (s.num >> 16) & 0xffff;
> >                 } else {
> >                         if (s.num > 0xffff) {
> >                                 r = -EINVAL;
> >                                 break;
> >                         }
> > -                       vq->last_avail_idx = s.num;
> > +                       vq->next_avail_head = vq->last_avail_idx = s.num;
>
> Why not just reuse last_avail_idx instead of creating next_avail_head?
>
> At first glance it seemed to me that it was done this way to support
> rewinding, but in_order path will happily reuse next_avail_head
> without checking for last_avail_idx except for checking if the ring is
> empty. Am I missing something?

Because the driver can submit a batch of available buffers so
last_avail_idx is not necessarily equal to next_avail_head.

>
> >                 }
> >                 /* Forget the cached index value. */
> >                 vq->avail_idx = vq->last_avail_idx;
> > @@ -2590,11 +2596,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
> >                       unsigned int *out_num, unsigned int *in_num,
> >                       struct vhost_log *log, unsigned int *log_num)
> >  {
> > +       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
> >         struct vring_desc desc;
> >         unsigned int i, head, found = 0;
> >         u16 last_avail_idx = vq->last_avail_idx;
> >         __virtio16 ring_head;
> > -       int ret, access;
> > +       int ret, access, c = 0;
> >
> >         if (vq->avail_idx == vq->last_avail_idx) {
> >                 ret = vhost_get_avail_idx(vq);
> > @@ -2605,17 +2612,21 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
> >                         return vq->num;
> >         }
> >
> > -       /* Grab the next descriptor number they're advertising, and increment
> > -        * the index we've seen. */
> > -       if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) {
> > -               vq_err(vq, "Failed to read head: idx %d address %p\n",
> > -                      last_avail_idx,
> > -                      &vq->avail->ring[last_avail_idx % vq->num]);
> > -               return -EFAULT;
> > +       if (in_order)
> > +               head = vq->next_avail_head & (vq->num - 1);
> > +       else {
> > +               /* Grab the next descriptor number they're
> > +                * advertising, and increment the index we've seen. */
> > +               if (unlikely(vhost_get_avail_head(vq, &ring_head,
> > +                                                 last_avail_idx))) {
> > +                       vq_err(vq, "Failed to read head: idx %d address %p\n",
> > +                               last_avail_idx,
> > +                               &vq->avail->ring[last_avail_idx % vq->num]);
> > +                       return -EFAULT;
> > +               }
> > +               head = vhost16_to_cpu(vq, ring_head);
> >         }
> >
> > -       head = vhost16_to_cpu(vq, ring_head);
> > -
> >         /* If their number is silly, that's an error. */
> >         if (unlikely(head >= vq->num)) {
> >                 vq_err(vq, "Guest says index %u > %u is available",
> > @@ -2658,6 +2669,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
> >                                                 "in indirect descriptor at idx %d\n", i);
> >                                 return ret;
> >                         }
> > +                       ++c;
> >                         continue;
> >                 }
> >
> > @@ -2693,10 +2705,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
> >                         }
> >                         *out_num += ret;
> >                 }
> > +               ++c;
> >         } while ((i = next_desc(vq, &desc)) != -1);
> >
> >         /* On success, increment avail index. */
> >         vq->last_avail_idx++;
> > +       vq->next_avail_head += c;
> >
> >         /* Assume notifications from guest are disabled at this point,
> >          * if they aren't we would need to update avail_event index. */
> > @@ -2720,8 +2734,9 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
> >                 cpu_to_vhost32(vq, head),
> >                 cpu_to_vhost32(vq, len)
> >         };
> > +       u16 nheads = 1;
> >
> > -       return vhost_add_used_n(vq, &heads, 1);
> > +       return vhost_add_used_n(vq, &heads, &nheads, 1);
> >  }
> >  EXPORT_SYMBOL_GPL(vhost_add_used);
> >
> > @@ -2757,10 +2772,10 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
> >         return 0;
> >  }
> >
> > -/* After we've used one of their buffers, we tell them about it.  We'll then
> > - * want to notify the guest, using eventfd. */
> > -int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
> > -                    unsigned count)
> > +static int vhost_add_used_n_ooo(struct vhost_virtqueue *vq,
> > +                               struct vring_used_elem *heads,
> > +                               u16 *nheads,
> > +                               unsigned count)
>
> nheads is not used in this function and it is checked to be NULL in
> the caller, should we remove it from the parameter list?

Exactly.

>
> >  {
> >         int start, n, r;
> >
> > @@ -2775,6 +2790,70 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
> >         }
> >         r = __vhost_add_used_n(vq, heads, count);
> >
> > +       return r;
>
> Nit: We can merge with the previous statement and do "return
> __vhost_add_used_n(vq, heads, count);"

Right.

>
> > +}
> > +
> > +static int vhost_add_used_n_in_order(struct vhost_virtqueue *vq,
> > +                                    struct vring_used_elem *heads,
> > +                                    u16 *nheads,
>
> Nit: we can const-ify nheads, and do the same for _in_order variant
> and vhost_add_used_n. Actually we can do it with heads too but it
> requires more changes to existing code. I think it would be nice to
> constify *nheads if you need to respin.

Let me do that.

>
> > +                                    unsigned count)
> > +{
> > +       vring_used_elem_t __user *used;
> > +       u16 old, new = vq->last_used_idx;
> > +       int start, i;
> > +
> > +       if (!nheads)
> > +               return -EINVAL;
> > +
> > +       start = vq->last_used_idx & (vq->num - 1);
> > +       used = vq->used->ring + start;
> > +
> > +       for (i = 0; i < count; i++) {
> > +               if (vhost_put_used(vq, &heads[i], start, 1)) {
> > +                       vq_err(vq, "Failed to write used");
> > +                       return -EFAULT;
> > +               }
> > +               start += nheads[i];
> > +               new += nheads[i];
> > +               if (start >= vq->num)
> > +                       start -= vq->num;
> > +       }
> > +
> > +       if (unlikely(vq->log_used)) {
> > +               /* Make sure data is seen before log. */
> > +               smp_wmb();
> > +               /* Log used ring entry write. */
> > +               log_used(vq, ((void __user *)used - (void __user *)vq->used),
> > +                        (vq->num - start) * sizeof *used);
> > +               if (start + count > vq->num)
> > +                       log_used(vq, 0,
> > +                                (start + count - vq->num) * sizeof *used);
> > +       }
> > +
> > +       old = vq->last_used_idx;
> > +       vq->last_used_idx = new;
> > +       /* If the driver never bothers to signal in a very long while,
> > +        * used index might wrap around. If that happens, invalidate
> > +        * signalled_used index we stored. TODO: make sure driver
> > +        * signals at least once in 2^16 and remove this. */
> > +       if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
> > +               vq->signalled_used_valid = false;
> > +       return 0;
> > +}
> > +
> > +/* After we've used one of their buffers, we tell them about it.  We'll then
> > + * want to notify the guest, using eventfd. */
> > +int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
> > +                    u16 *nheads, unsigned count)
> > +{
> > +       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
> > +       int r;
> > +
> > +       if (!in_order || !nheads)
> > +               r = vhost_add_used_n_ooo(vq, heads, nheads, count);
> > +       else
> > +               r = vhost_add_used_n_in_order(vq, heads, nheads, count);
> > +
>
> I just realized the original code didn't do it either, but we should
> return if r < 0 here. Otherwise, used->ring[] has a random value and
> used->idx is incremented covering these values. This should be
> triggable in a guest that set used->idx valid but used->ring[]
> invalid, for example.

This looks like a bug, I will send an independent fix.

>
> >         /* Make sure buffer is written before we update index. */
> >         smp_wmb();
> >         if (vhost_put_used_idx(vq)) {
> > @@ -2853,9 +2932,11 @@ EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
> >  /* multi-buffer version of vhost_add_used_and_signal */
> >  void vhost_add_used_and_signal_n(struct vhost_dev *dev,
> >                                  struct vhost_virtqueue *vq,
> > -                                struct vring_used_elem *heads, unsigned count)
> > +                                struct vring_used_elem *heads,
> > +                                u16 *nheads,
> > +                                unsigned count)
> >  {
> > -       vhost_add_used_n(vq, heads, count);
> > +       vhost_add_used_n(vq, heads, nheads, count);
> >         vhost_signal(dev, vq);
> >  }
> >  EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
> > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> > index bb75a292d50c..dca9f309d396 100644
> > --- a/drivers/vhost/vhost.h
> > +++ b/drivers/vhost/vhost.h
> > @@ -103,6 +103,8 @@ struct vhost_virtqueue {
> >          * Values are limited to 0x7fff, and the high bit is used as
> >          * a wrap counter when using VIRTIO_F_RING_PACKED. */
> >         u16 last_avail_idx;
> > +       /* Next avail ring head when VIRTIO_F_IN_ORDER is neogitated */
>
> s/neogitated/negotiated/

Will fix it.

>
> > +       u16 next_avail_head;
> >
> >         /* Caches available index value from user. */
> >         u16 avail_idx;
> > @@ -129,6 +131,7 @@ struct vhost_virtqueue {
> >         struct iovec iotlb_iov[64];
> >         struct iovec *indirect;
> >         struct vring_used_elem *heads;
> > +       u16 *nheads;
> >         /* Protected by virtqueue mutex. */
> >         struct vhost_iotlb *umem;
> >         struct vhost_iotlb *iotlb;
> > @@ -213,11 +216,12 @@ bool vhost_vq_is_setup(struct vhost_virtqueue *vq);
> >  int vhost_vq_init_access(struct vhost_virtqueue *);
> >  int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
> >  int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
> > -                    unsigned count);
> > +                    u16 *nheads, unsigned count);
> >  void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
> >                                unsigned int id, int len);
> >  void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
> > -                              struct vring_used_elem *heads, unsigned count);
> > +                                struct vring_used_elem *heads, u16 *nheads,
> > +                                unsigned count);
> >  void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
> >  void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
> >  bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *);
> > --
> > 2.31.1
> >
>

Thanks


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-07-11  1:44 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-08  6:48 [PATCH net-next 0/2] in order support for vhost-net Jason Wang
2025-07-08  6:48 ` [PATCH net-next 1/2] vhost: basic in order support Jason Wang
2025-07-09 16:38   ` Jonah Palmer
2025-07-10  9:04   ` Eugenio Perez Martin
2025-07-11  1:44     ` Jason Wang
2025-07-08  6:48 ` [PATCH net-next 2/2] vhost_net: basic in_order support Jason Wang
2025-07-09 16:38   ` Jonah Palmer
2025-07-10 11:56   ` Eugenio Perez Martin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).