[PATCH v2] hw/net/virtio-net: add support for notification coalescing

public inbox for qemu-devel@nongnu.org
 help / color / mirror / Atom feed

* [PATCH v2] hw/net/virtio-net: add support for notification coalescing
@ 2026-03-11 13:28 Koushik Dutta
  2026-03-12 10:17 ` Eugenio Perez Martin
  0 siblings, 1 reply; 4+ messages in thread
From: Koushik Dutta @ 2026-03-11 13:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Stefano Garzarella, Michael S. Tsirkin, Eugenio Pérez,
	Jason Wang

Implement VirtIO Network Notification Coalescing (Bit 53).
This allows the guest to manage interrupt frequency using ethtool
-C for both RX and TX paths.

- Added VIRTIO_NET_F_NOTF_COAL to host features.
- Implemented VIRTIO_NET_CTRL_NOTF_COAL class handling in
  virtio_net_handle_ctrl_iov.
- Added logic to store and apply rx/tx usecs and max_packets.
- Added packet counters and threshold logic for both RX and TX data paths.
- Dynamic Dispatcher: Implemented a dispatcher mechanism that
  dynamically switches/activates the notification callback logic
  only after the guest enables TX coalescing via ethtool.

This reduces interrupt overhead by batching notifications based on
either a packet count or a time-based threshold.

Signed-off-by: Koushik Dutta <kdutta@redhat.com>
---
 hw/net/virtio-net.c                | 114 ++++++++++++++++++++++++-----
 hw/virtio/vhost-shadow-virtqueue.c |   1 +
 hw/virtio/vhost-vdpa.c             |   3 +
 include/hw/virtio/virtio-net.h     |   8 ++
 net/vhost-vdpa.c                   |  10 +++
 5 files changed, 119 insertions(+), 17 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index eccb48ad42..96aa70aea8 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -157,6 +157,15 @@ static void flush_or_purge_queued_packets(NetClientState *nc)
  * - we could suppress RX interrupt if we were so inclined.
  */
 
+static void virtio_net_rx_notify(void *opaque)
+{
+    VirtIONet *n = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(n);
+
+    n->rx_pkt_cnt = 0;
+    virtio_notify(vdev, n->vqs[0].rx_vq);
+}
+
 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 {
     VirtIONet *n = VIRTIO_NET(vdev);
@@ -1081,6 +1090,29 @@ static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
     }
 }
 
+static int virtio_net_handle_coal(VirtIONet *n, uint8_t cmd,
+                                  struct iovec *iov, unsigned int iov_cnt)
+{
+    struct virtio_net_ctrl_coal coal;
+    size_t s;
+
+    s = iov_to_buf(iov, iov_cnt, 0, &coal, sizeof(coal));
+    if (s != sizeof(coal)) {
+        return VIRTIO_NET_ERR;
+    }
+
+    if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_RX_SET) {
+        n->rx_coal_usecs = le32_to_cpu(coal.max_usecs);
+        n->rx_coal_packets = le32_to_cpu(coal.max_packets);
+    } else if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_TX_SET) {
+        n->tx_coal_usecs = le32_to_cpu(coal.max_usecs);
+        n->tx_coal_packets = le32_to_cpu(coal.max_packets);
+        n->tx_timeout = n->tx_coal_usecs * 1000;
+    }
+
+    return VIRTIO_NET_OK;
+}
+
 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
                                  struct iovec *iov, unsigned int iov_cnt)
 {
@@ -1582,6 +1614,8 @@ size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
+    } else if (ctrl.class == VIRTIO_NET_CTRL_NOTF_COAL) {
+        status = virtio_net_handle_coal(n, ctrl.cmd, iov, out_num);
     }
 
     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
@@ -2041,7 +2075,18 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
     }
 
     virtqueue_flush(q->rx_vq, i);
-    virtio_notify(vdev, q->rx_vq);
+
+    /* rx coalescing */
+    n->rx_pkt_cnt += i;
+    if (n->rx_coal_usecs == 0 || n->rx_pkt_cnt >= n->rx_coal_packets) {
+        timer_del(n->rx_index_timer);
+        virtio_net_rx_notify(n);
+    } else {
+        if (n->rx_pkt_cnt == i) {
+            timer_mod(n->rx_index_timer,
+                      qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + n->rx_coal_usecs);
+        }
+    }
 
     return size;
 
@@ -2900,6 +2945,7 @@ static void virtio_net_tx_timer(void *opaque)
     if (ret == -EBUSY || ret == -EINVAL) {
         return;
     }
+    n->tx_pkt_cnt -= ret;
     /*
      * If we flush a full burst of packets, assume there are
      * more coming and immediately rearm
@@ -2919,6 +2965,7 @@ static void virtio_net_tx_timer(void *opaque)
     ret = virtio_net_flush_tx(q);
     if (ret > 0) {
         virtio_queue_set_notification(q->tx_vq, 0);
+        n->tx_pkt_cnt -= ret;
         q->tx_waiting = 1;
         timer_mod(q->tx_timer,
                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
@@ -2974,27 +3021,45 @@ static void virtio_net_tx_bh(void *opaque)
     }
 }
 
-static void virtio_net_add_queue(VirtIONet *n, int index)
+static void virtio_net_handle_tx_dispatch(VirtIODevice *vdev, VirtQueue *vq)
 {
-    VirtIODevice *vdev = VIRTIO_DEVICE(n);
+    VirtIONet *n = VIRTIO_NET(vdev);
 
-    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
-                                           virtio_net_handle_rx);
+    n->tx_pkt_cnt++;
+    bool use_timer = n->tx_timer_activate || n->tx_coal_usecs > 0 ||
+                     n->tx_coal_packets > 0;
 
-    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
-        n->vqs[index].tx_vq =
-            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
-                             virtio_net_handle_tx_timer);
-        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-                                              virtio_net_tx_timer,
-                                              &n->vqs[index]);
+    if (use_timer && n->tx_pkt_cnt < n->tx_coal_packets) {
+        virtio_net_handle_tx_timer(vdev, vq);
     } else {
-        n->vqs[index].tx_vq =
-            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
-                             virtio_net_handle_tx_bh);
-        n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
-                                                  &DEVICE(vdev)->mem_reentrancy_guard);
+        n->tx_pkt_cnt = 0;
+        virtio_net_handle_tx_bh(vdev, vq);
     }
+}
+
+static void virtio_net_add_queue(VirtIONet *n, int index)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(n);
+
+    n->vqs[index].rx_vq =
+        virtio_add_queue(vdev,
+                         n->net_conf.rx_queue_size,
+                         virtio_net_handle_rx);
+
+    n->vqs[index].tx_vq =
+        virtio_add_queue(vdev,
+                         n->net_conf.tx_queue_size,
+                         virtio_net_handle_tx_dispatch);
+
+    n->vqs[index].tx_timer =
+        timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                     virtio_net_tx_timer,
+                     &n->vqs[index]);
+
+    n->vqs[index].tx_bh =
+        qemu_bh_new_guarded(virtio_net_tx_bh,
+                            &n->vqs[index],
+                            &DEVICE(vdev)->mem_reentrancy_guard);
 
     n->vqs[index].tx_waiting = 0;
     n->vqs[index].n = n;
@@ -3089,6 +3154,7 @@ static void virtio_net_get_features(VirtIODevice *vdev, uint64_t *features,
     virtio_features_or(features, features, n->host_features_ex);
 
     virtio_add_feature_ex(features, VIRTIO_NET_F_MAC);
+    virtio_add_feature_ex(features, VIRTIO_NET_F_NOTF_COAL);
 
     if (!peer_has_vnet_hdr(n)) {
         virtio_clear_feature_ex(features, VIRTIO_NET_F_CSUM);
@@ -3972,6 +4038,10 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
         error_printf("Defaulting to \"bh\"");
     }
 
+    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")) {
+        n->tx_timer_activate = true;
+    }
+
     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
                                     n->net_conf.tx_queue_size);
 
@@ -4048,6 +4118,14 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
             n->rss_data.specified_hash_types.on_bits |
             n->rss_data.specified_hash_types.auto_bits;
     }
+    n->rx_pkt_cnt = 0;
+    n->tx_pkt_cnt = 0;
+    n->rx_coal_usecs = 0;
+    n->tx_coal_usecs = 0;
+    n->rx_coal_packets = 0;
+    n->tx_coal_packets = 0;
+    n->rx_index_timer = timer_new_us(QEMU_CLOCK_VIRTUAL,
+                                     virtio_net_rx_notify, n);
 }
 
 static void virtio_net_device_unrealize(DeviceState *dev)
@@ -4262,6 +4340,8 @@ static const Property virtio_net_properties[] = {
                       VIRTIO_NET_F_GUEST_USO6, true),
     DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
                       VIRTIO_NET_F_HOST_USO, true),
+    DEFINE_PROP_BIT64("vq_notf_coal", VirtIONet, host_features,
+                      VIRTIO_NET_F_NOTF_COAL, true),
     DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet,
                                   rss_data.specified_hash_types,
                                   VIRTIO_NET_HASH_REPORT_IPv4 - 1,
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index 6242aeb69c..104ed0ce8f 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -37,6 +37,7 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp)
         case VIRTIO_RING_F_INDIRECT_DESC:
             continue;
 
+        case VIRTIO_F_RING_RESET:
         case VIRTIO_F_ACCESS_PLATFORM:
             /* SVQ trust in the host's IOMMU to translate addresses */
         case VIRTIO_F_VERSION_1:
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 2f8f11df86..8eb9bdb961 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1541,6 +1541,9 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev,
     if (ret == 0) {
         /* Add SVQ logging capabilities */
         *features |= BIT_ULL(VHOST_F_LOG_ALL);
+
+        /* Add notification coalescing features */
+        *features |= BIT_ULL(VIRTIO_NET_F_NOTF_COAL);
     }
 
     return ret;
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 5b8ab7bda7..024501ed37 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -231,6 +231,14 @@ struct VirtIONet {
     struct EBPFRSSContext ebpf_rss;
     uint32_t nr_ebpf_rss_fds;
     char **ebpf_rss_fds;
+    QEMUTimer *rx_index_timer;
+    uint32_t rx_coal_usecs;
+    uint32_t rx_coal_packets;
+    uint32_t rx_pkt_cnt;
+    uint32_t tx_coal_usecs;
+    uint32_t tx_coal_packets;
+    uint32_t tx_pkt_cnt;
+    bool tx_timer_activate;
 };
 
 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 3df6091274..6a5d0f4cae 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -61,6 +61,7 @@ static const int vdpa_feature_bits[] = {
     VIRTIO_F_NOTIFY_ON_EMPTY,
     VIRTIO_F_RING_PACKED,
     VIRTIO_F_RING_RESET,
+    VIRTIO_F_ORDER_PLATFORM,
     VIRTIO_F_VERSION_1,
     VIRTIO_F_IN_ORDER,
     VIRTIO_F_NOTIFICATION_DATA,
@@ -70,6 +71,8 @@ static const int vdpa_feature_bits[] = {
     VIRTIO_NET_F_CTRL_RX,
     VIRTIO_NET_F_CTRL_RX_EXTRA,
     VIRTIO_NET_F_CTRL_VLAN,
+    VIRTIO_NET_F_GUEST_ANNOUNCE,
+    VIRTIO_NET_F_NOTF_COAL,
     VIRTIO_NET_F_CTRL_VQ,
     VIRTIO_NET_F_GSO,
     VIRTIO_NET_F_GUEST_CSUM,
@@ -115,6 +118,13 @@ static const uint64_t vdpa_svq_device_features =
     BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
     BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
     BIT_ULL(VIRTIO_NET_F_STATUS) |
+    BIT_ULL(VIRTIO_F_RING_RESET) |
+    BIT_ULL(VIRTIO_F_ORDER_PLATFORM) |
+    BIT_ULL(VIRTIO_NET_F_GUEST_USO4) |
+    BIT_ULL(VIRTIO_NET_F_GUEST_USO6) |
+    BIT_ULL(VIRTIO_NET_F_HOST_USO) |
+    BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |
+    BIT_ULL(VIRTIO_NET_F_NOTF_COAL) |
     BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
     BIT_ULL(VIRTIO_NET_F_GSO) |
     BIT_ULL(VIRTIO_NET_F_CTRL_RX) |
-- 
2.53.0



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] hw/net/virtio-net: add support for notification coalescing
  2026-03-11 13:28 [PATCH v2] hw/net/virtio-net: add support for notification coalescing Koushik Dutta
@ 2026-03-12 10:17 ` Eugenio Perez Martin
  2026-03-23  8:38   ` Koushik Dutta
  0 siblings, 1 reply; 4+ messages in thread
From: Eugenio Perez Martin @ 2026-03-12 10:17 UTC (permalink / raw)
  To: Koushik Dutta
  Cc: qemu-devel, Stefano Garzarella, Michael S. Tsirkin, Jason Wang

On Wed, Mar 11, 2026 at 2:28 PM Koushik Dutta <kdutta@redhat.com> wrote:
>
> Implement VirtIO Network Notification Coalescing (Bit 53).
> This allows the guest to manage interrupt frequency using ethtool
> -C for both RX and TX paths.
>
> - Added VIRTIO_NET_F_NOTF_COAL to host features.
> - Implemented VIRTIO_NET_CTRL_NOTF_COAL class handling in
>   virtio_net_handle_ctrl_iov.
> - Added logic to store and apply rx/tx usecs and max_packets.
> - Added packet counters and threshold logic for both RX and TX data paths.
> - Dynamic Dispatcher: Implemented a dispatcher mechanism that
>   dynamically switches/activates the notification callback logic
>   only after the guest enables TX coalescing via ethtool.
>
> This reduces interrupt overhead by batching notifications based on
> either a packet count or a time-based threshold.
>
> Signed-off-by: Koushik Dutta <kdutta@redhat.com>
> ---
>  hw/net/virtio-net.c                | 114 ++++++++++++++++++++++++-----
>  hw/virtio/vhost-shadow-virtqueue.c |   1 +
>  hw/virtio/vhost-vdpa.c             |   3 +
>  include/hw/virtio/virtio-net.h     |   8 ++
>  net/vhost-vdpa.c                   |  10 +++
>  5 files changed, 119 insertions(+), 17 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index eccb48ad42..96aa70aea8 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -157,6 +157,15 @@ static void flush_or_purge_queued_packets(NetClientState *nc)
>   * - we could suppress RX interrupt if we were so inclined.
>   */
>
> +static void virtio_net_rx_notify(void *opaque)
> +{
> +    VirtIONet *n = opaque;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(n);
> +
> +    n->rx_pkt_cnt = 0;
> +    virtio_notify(vdev, n->vqs[0].rx_vq);
> +}
> +
>  static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
>  {
>      VirtIONet *n = VIRTIO_NET(vdev);
> @@ -1081,6 +1090,29 @@ static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
>      }
>  }
>
> +static int virtio_net_handle_coal(VirtIONet *n, uint8_t cmd,
> +                                  struct iovec *iov, unsigned int iov_cnt)
> +{
> +    struct virtio_net_ctrl_coal coal;
> +    size_t s;
> +
> +    s = iov_to_buf(iov, iov_cnt, 0, &coal, sizeof(coal));
> +    if (s != sizeof(coal)) {
> +        return VIRTIO_NET_ERR;
> +    }
> +
> +    if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_RX_SET) {
> +        n->rx_coal_usecs = le32_to_cpu(coal.max_usecs);
> +        n->rx_coal_packets = le32_to_cpu(coal.max_packets);
> +    } else if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_TX_SET) {
> +        n->tx_coal_usecs = le32_to_cpu(coal.max_usecs);
> +        n->tx_coal_packets = le32_to_cpu(coal.max_packets);
> +        n->tx_timeout = n->tx_coal_usecs * 1000;
> +    }
> +
> +    return VIRTIO_NET_OK;
> +}
> +
>  static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
>                                   struct iovec *iov, unsigned int iov_cnt)
>  {
> @@ -1582,6 +1614,8 @@ size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
>          status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
>      } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
>          status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
> +    } else if (ctrl.class == VIRTIO_NET_CTRL_NOTF_COAL) {
> +        status = virtio_net_handle_coal(n, ctrl.cmd, iov, out_num);
>      }
>
>      s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
> @@ -2041,7 +2075,18 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
>      }
>
>      virtqueue_flush(q->rx_vq, i);
> -    virtio_notify(vdev, q->rx_vq);
> +
> +    /* rx coalescing */
> +    n->rx_pkt_cnt += i;
> +    if (n->rx_coal_usecs == 0 || n->rx_pkt_cnt >= n->rx_coal_packets) {
> +        timer_del(n->rx_index_timer);
> +        virtio_net_rx_notify(n);
> +    } else {
> +        if (n->rx_pkt_cnt == i) {

Maybe it's better to check if !timer_pending? It wasn't immediately
clear to me that `n->rx_pkt_cnt == i` until I realized that
`n->rx_pkt_cnt` is 0 at the beginning of the function for that
condition to be true.

> +            timer_mod(n->rx_index_timer,
> +                      qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + n->rx_coal_usecs);
> +        }
> +    }
>
>      return size;
>
> @@ -2900,6 +2945,7 @@ static void virtio_net_tx_timer(void *opaque)
>      if (ret == -EBUSY || ret == -EINVAL) {
>          return;
>      }
> +    n->tx_pkt_cnt -= ret;
>      /*
>       * If we flush a full burst of packets, assume there are
>       * more coming and immediately rearm
> @@ -2919,6 +2965,7 @@ static void virtio_net_tx_timer(void *opaque)
>      ret = virtio_net_flush_tx(q);
>      if (ret > 0) {
>          virtio_queue_set_notification(q->tx_vq, 0);
> +        n->tx_pkt_cnt -= ret;

If we got his way, we must control for underflow here.

But I think it should be n->pt_pkt_cnt = 0, isn't it?

>          q->tx_waiting = 1;
>          timer_mod(q->tx_timer,
>                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> @@ -2974,27 +3021,45 @@ static void virtio_net_tx_bh(void *opaque)
>      }
>  }
>
> -static void virtio_net_add_queue(VirtIONet *n, int index)
> +static void virtio_net_handle_tx_dispatch(VirtIODevice *vdev, VirtQueue *vq)
>  {
> -    VirtIODevice *vdev = VIRTIO_DEVICE(n);
> +    VirtIONet *n = VIRTIO_NET(vdev);
>
> -    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
> -                                           virtio_net_handle_rx);
> +    n->tx_pkt_cnt++;
> +    bool use_timer = n->tx_timer_activate || n->tx_coal_usecs > 0 ||
> +                     n->tx_coal_packets > 0;

No declarations in the middle of the function.

Also, why use_timer = true as long as n->tx_coal_packets > 0?

>
> -    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
> -        n->vqs[index].tx_vq =
> -            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
> -                             virtio_net_handle_tx_timer);
> -        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
> -                                              virtio_net_tx_timer,
> -                                              &n->vqs[index]);
> +    if (use_timer && n->tx_pkt_cnt < n->tx_coal_packets) {
> +        virtio_net_handle_tx_timer(vdev, vq);
>      } else {
> -        n->vqs[index].tx_vq =
> -            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
> -                             virtio_net_handle_tx_bh);
> -        n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
> -                                                  &DEVICE(vdev)->mem_reentrancy_guard);
> +        n->tx_pkt_cnt = 0;
> +        virtio_net_handle_tx_bh(vdev, vq);
>      }
> +}
> +
> +static void virtio_net_add_queue(VirtIONet *n, int index)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(n);
> +
> +    n->vqs[index].rx_vq =
> +        virtio_add_queue(vdev,
> +                         n->net_conf.rx_queue_size,
> +                         virtio_net_handle_rx);
> +
> +    n->vqs[index].tx_vq =
> +        virtio_add_queue(vdev,
> +                         n->net_conf.tx_queue_size,
> +                         virtio_net_handle_tx_dispatch);
> +
> +    n->vqs[index].tx_timer =
> +        timer_new_ns(QEMU_CLOCK_VIRTUAL,
> +                     virtio_net_tx_timer,
> +                     &n->vqs[index]);

We should avoid creating a new timer if it will not be used by
default. Create it only if cmdline enables it or the guest enable it
via control vq.

> +
> +    n->vqs[index].tx_bh =
> +        qemu_bh_new_guarded(virtio_net_tx_bh,
> +                            &n->vqs[index],
> +                            &DEVICE(vdev)->mem_reentrancy_guard);
>
>      n->vqs[index].tx_waiting = 0;
>      n->vqs[index].n = n;
> @@ -3089,6 +3154,7 @@ static void virtio_net_get_features(VirtIODevice *vdev, uint64_t *features,
>      virtio_features_or(features, features, n->host_features_ex);
>
>      virtio_add_feature_ex(features, VIRTIO_NET_F_MAC);
> +    virtio_add_feature_ex(features, VIRTIO_NET_F_NOTF_COAL);

We shouldn't enable VIRTIO_NET_F_NOTF_COAL unconditionally, it should
be regulated by the cmdline property.

>
>      if (!peer_has_vnet_hdr(n)) {
>          virtio_clear_feature_ex(features, VIRTIO_NET_F_CSUM);
> @@ -3972,6 +4038,10 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>          error_printf("Defaulting to \"bh\"");
>      }
>
> +    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")) {
> +        n->tx_timer_activate = true;
> +    }
> +
>      n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
>                                      n->net_conf.tx_queue_size);
>
> @@ -4048,6 +4118,14 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>              n->rss_data.specified_hash_types.on_bits |
>              n->rss_data.specified_hash_types.auto_bits;
>      }
> +    n->rx_pkt_cnt = 0;
> +    n->tx_pkt_cnt = 0;
> +    n->rx_coal_usecs = 0;
> +    n->tx_coal_usecs = 0;
> +    n->rx_coal_packets = 0;
> +    n->tx_coal_packets = 0;
> +    n->rx_index_timer = timer_new_us(QEMU_CLOCK_VIRTUAL,
> +                                     virtio_net_rx_notify, n);

Same as the tx timer. Is it worth creating a new timer even if it will
not be used by default?

>  }
>
>  static void virtio_net_device_unrealize(DeviceState *dev)
> @@ -4262,6 +4340,8 @@ static const Property virtio_net_properties[] = {
>                        VIRTIO_NET_F_GUEST_USO6, true),
>      DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
>                        VIRTIO_NET_F_HOST_USO, true),
> +    DEFINE_PROP_BIT64("vq_notf_coal", VirtIONet, host_features,
> +                      VIRTIO_NET_F_NOTF_COAL, true),
>      DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet,
>                                    rss_data.specified_hash_types,
>                                    VIRTIO_NET_HASH_REPORT_IPv4 - 1,
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> index 6242aeb69c..104ed0ce8f 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -37,6 +37,7 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp)
>          case VIRTIO_RING_F_INDIRECT_DESC:
>              continue;
>
> +        case VIRTIO_F_RING_RESET:

This patch does not handle VIRTIO_F_RING_RESET, so this should not be enabled.

>          case VIRTIO_F_ACCESS_PLATFORM:
>              /* SVQ trust in the host's IOMMU to translate addresses */
>          case VIRTIO_F_VERSION_1:
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 2f8f11df86..8eb9bdb961 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -1541,6 +1541,9 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev,
>      if (ret == 0) {
>          /* Add SVQ logging capabilities */
>          *features |= BIT_ULL(VHOST_F_LOG_ALL);
> +
> +        /* Add notification coalescing features */
> +        *features |= BIT_ULL(VIRTIO_NET_F_NOTF_COAL);
>      }
>
>      return ret;
> diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
> index 5b8ab7bda7..024501ed37 100644
> --- a/include/hw/virtio/virtio-net.h
> +++ b/include/hw/virtio/virtio-net.h
> @@ -231,6 +231,14 @@ struct VirtIONet {
>      struct EBPFRSSContext ebpf_rss;
>      uint32_t nr_ebpf_rss_fds;
>      char **ebpf_rss_fds;
> +    QEMUTimer *rx_index_timer;
> +    uint32_t rx_coal_usecs;
> +    uint32_t rx_coal_packets;
> +    uint32_t rx_pkt_cnt;
> +    uint32_t tx_coal_usecs;
> +    uint32_t tx_coal_packets;
> +    uint32_t tx_pkt_cnt;
> +    bool tx_timer_activate;
>  };
>
>  size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 3df6091274..6a5d0f4cae 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -61,6 +61,7 @@ static const int vdpa_feature_bits[] = {
>      VIRTIO_F_NOTIFY_ON_EMPTY,
>      VIRTIO_F_RING_PACKED,
>      VIRTIO_F_RING_RESET,
> +    VIRTIO_F_ORDER_PLATFORM,
>      VIRTIO_F_VERSION_1,
>      VIRTIO_F_IN_ORDER,
>      VIRTIO_F_NOTIFICATION_DATA,
> @@ -70,6 +71,8 @@ static const int vdpa_feature_bits[] = {
>      VIRTIO_NET_F_CTRL_RX,
>      VIRTIO_NET_F_CTRL_RX_EXTRA,
>      VIRTIO_NET_F_CTRL_VLAN,
> +    VIRTIO_NET_F_GUEST_ANNOUNCE,
> +    VIRTIO_NET_F_NOTF_COAL,

Only this feature can be added to vdpa_feature_bits. The rest of them
must not be added to any other array.

>      VIRTIO_NET_F_CTRL_VQ,
>      VIRTIO_NET_F_GSO,
>      VIRTIO_NET_F_GUEST_CSUM,
> @@ -115,6 +118,13 @@ static const uint64_t vdpa_svq_device_features =
>      BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
>      BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
>      BIT_ULL(VIRTIO_NET_F_STATUS) |
> +    BIT_ULL(VIRTIO_F_RING_RESET) |
> +    BIT_ULL(VIRTIO_F_ORDER_PLATFORM) |
> +    BIT_ULL(VIRTIO_NET_F_GUEST_USO4) |
> +    BIT_ULL(VIRTIO_NET_F_GUEST_USO6) |
> +    BIT_ULL(VIRTIO_NET_F_HOST_USO) |
> +    BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |
> +    BIT_ULL(VIRTIO_NET_F_NOTF_COAL) |
>      BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
>      BIT_ULL(VIRTIO_NET_F_GSO) |
>      BIT_ULL(VIRTIO_NET_F_CTRL_RX) |
> --
> 2.53.0
>



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] hw/net/virtio-net: add support for notification coalescing
  2026-03-12 10:17 ` Eugenio Perez Martin
@ 2026-03-23  8:38   ` Koushik Dutta
  2026-03-23  9:46     ` Eugenio Perez Martin
  0 siblings, 1 reply; 4+ messages in thread
From: Koushik Dutta @ 2026-03-23  8:38 UTC (permalink / raw)
  To: Eugenio Perez Martin
  Cc: qemu-devel, Stefano Garzarella, Michael S. Tsirkin, Jason Wang

[-- Attachment #1: Type: text/plain, Size: 15557 bytes --]

Hi Eugenio,
Regarding the point about not enabling VIRTIO_NET_F_NOTF_COAL
unconditionally: since feature negotiation happens during VM bootup and
coalescing is configured via ethtool after the VM has already booted, how
would we handle this under a conditional property?

If the feature isn't negotiated at the start, we wouldn't be able to enable
it dynamically later. Could you clarify how you'd like the command-line
regulation to interact with the guest's ability to enable it later?
------------------------------------------------------------

> @@ -37,6 +37,7 @@ bool vhost_svq_valid_features(uint64_t features, Error
**errp)
>          case VIRTIO_RING_F_INDIRECT_DESC:
>              continue;
>
> +        case VIRTIO_F_RING_RESET:

This patch does not handle VIRTIO_F_RING_RESET, so this should not be
enabled.

=> I included VIRTIO_F_RING_RESET in the valid features list because the
vDPA host device offers it by default; without it, SVQ fails the feature
negotiation check during initialization.
qemu-system-x86_64: -netdev
type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0,x-svq=on: SVQ
Invalid device feature flags, offer: 0x1e0010330bfffa7, ok:
0x1e0000330bfffa7

Regards,
Koushik Dutta

On Thu, Mar 12, 2026 at 3:47 PM Eugenio Perez Martin <eperezma@redhat.com>
wrote:

> On Wed, Mar 11, 2026 at 2:28 PM Koushik Dutta <kdutta@redhat.com> wrote:
> >
> > Implement VirtIO Network Notification Coalescing (Bit 53).
> > This allows the guest to manage interrupt frequency using ethtool
> > -C for both RX and TX paths.
> >
> > - Added VIRTIO_NET_F_NOTF_COAL to host features.
> > - Implemented VIRTIO_NET_CTRL_NOTF_COAL class handling in
> >   virtio_net_handle_ctrl_iov.
> > - Added logic to store and apply rx/tx usecs and max_packets.
> > - Added packet counters and threshold logic for both RX and TX data
> paths.
> > - Dynamic Dispatcher: Implemented a dispatcher mechanism that
> >   dynamically switches/activates the notification callback logic
> >   only after the guest enables TX coalescing via ethtool.
> >
> > This reduces interrupt overhead by batching notifications based on
> > either a packet count or a time-based threshold.
> >
> > Signed-off-by: Koushik Dutta <kdutta@redhat.com>
> > ---
> >  hw/net/virtio-net.c                | 114 ++++++++++++++++++++++++-----
> >  hw/virtio/vhost-shadow-virtqueue.c |   1 +
> >  hw/virtio/vhost-vdpa.c             |   3 +
> >  include/hw/virtio/virtio-net.h     |   8 ++
> >  net/vhost-vdpa.c                   |  10 +++
> >  5 files changed, 119 insertions(+), 17 deletions(-)
> >
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index eccb48ad42..96aa70aea8 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -157,6 +157,15 @@ static void
> flush_or_purge_queued_packets(NetClientState *nc)
> >   * - we could suppress RX interrupt if we were so inclined.
> >   */
> >
> > +static void virtio_net_rx_notify(void *opaque)
> > +{
> > +    VirtIONet *n = opaque;
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(n);
> > +
> > +    n->rx_pkt_cnt = 0;
> > +    virtio_notify(vdev, n->vqs[0].rx_vq);
> > +}
> > +
> >  static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
> >  {
> >      VirtIONet *n = VIRTIO_NET(vdev);
> > @@ -1081,6 +1090,29 @@ static int virtio_net_handle_offloads(VirtIONet
> *n, uint8_t cmd,
> >      }
> >  }
> >
> > +static int virtio_net_handle_coal(VirtIONet *n, uint8_t cmd,
> > +                                  struct iovec *iov, unsigned int
> iov_cnt)
> > +{
> > +    struct virtio_net_ctrl_coal coal;
> > +    size_t s;
> > +
> > +    s = iov_to_buf(iov, iov_cnt, 0, &coal, sizeof(coal));
> > +    if (s != sizeof(coal)) {
> > +        return VIRTIO_NET_ERR;
> > +    }
> > +
> > +    if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_RX_SET) {
> > +        n->rx_coal_usecs = le32_to_cpu(coal.max_usecs);
> > +        n->rx_coal_packets = le32_to_cpu(coal.max_packets);
> > +    } else if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_TX_SET) {
> > +        n->tx_coal_usecs = le32_to_cpu(coal.max_usecs);
> > +        n->tx_coal_packets = le32_to_cpu(coal.max_packets);
> > +        n->tx_timeout = n->tx_coal_usecs * 1000;
> > +    }
> > +
> > +    return VIRTIO_NET_OK;
> > +}
> > +
> >  static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
> >                                   struct iovec *iov, unsigned int
> iov_cnt)
> >  {
> > @@ -1582,6 +1614,8 @@ size_t virtio_net_handle_ctrl_iov(VirtIODevice
> *vdev,
> >          status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
> >      } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
> >          status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
> > +    } else if (ctrl.class == VIRTIO_NET_CTRL_NOTF_COAL) {
> > +        status = virtio_net_handle_coal(n, ctrl.cmd, iov, out_num);
> >      }
> >
> >      s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
> > @@ -2041,7 +2075,18 @@ static ssize_t
> virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
> >      }
> >
> >      virtqueue_flush(q->rx_vq, i);
> > -    virtio_notify(vdev, q->rx_vq);
> > +
> > +    /* rx coalescing */
> > +    n->rx_pkt_cnt += i;
> > +    if (n->rx_coal_usecs == 0 || n->rx_pkt_cnt >= n->rx_coal_packets) {
> > +        timer_del(n->rx_index_timer);
> > +        virtio_net_rx_notify(n);
> > +    } else {
> > +        if (n->rx_pkt_cnt == i) {
>
> Maybe it's better to check if !timer_pending? It wasn't immediately
> clear to me that `n->rx_pkt_cnt == i` until I realized that
> `n->rx_pkt_cnt` is 0 at the beginning of the function for that
> condition to be true.
>
> > +            timer_mod(n->rx_index_timer,
> > +                      qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) +
> n->rx_coal_usecs);
> > +        }
> > +    }
> >
> >      return size;
> >
> > @@ -2900,6 +2945,7 @@ static void virtio_net_tx_timer(void *opaque)
> >      if (ret == -EBUSY || ret == -EINVAL) {
> >          return;
> >      }
> > +    n->tx_pkt_cnt -= ret;
> >      /*
> >       * If we flush a full burst of packets, assume there are
> >       * more coming and immediately rearm
> > @@ -2919,6 +2965,7 @@ static void virtio_net_tx_timer(void *opaque)
> >      ret = virtio_net_flush_tx(q);
> >      if (ret > 0) {
> >          virtio_queue_set_notification(q->tx_vq, 0);
> > +        n->tx_pkt_cnt -= ret;
>
> If we got his way, we must control for underflow here.
>
> But I think it should be n->pt_pkt_cnt = 0, isn't it?
>
> >          q->tx_waiting = 1;
> >          timer_mod(q->tx_timer,
> >                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> n->tx_timeout);
> > @@ -2974,27 +3021,45 @@ static void virtio_net_tx_bh(void *opaque)
> >      }
> >  }
> >
> > -static void virtio_net_add_queue(VirtIONet *n, int index)
> > +static void virtio_net_handle_tx_dispatch(VirtIODevice *vdev, VirtQueue
> *vq)
> >  {
> > -    VirtIODevice *vdev = VIRTIO_DEVICE(n);
> > +    VirtIONet *n = VIRTIO_NET(vdev);
> >
> > -    n->vqs[index].rx_vq = virtio_add_queue(vdev,
> n->net_conf.rx_queue_size,
> > -                                           virtio_net_handle_rx);
> > +    n->tx_pkt_cnt++;
> > +    bool use_timer = n->tx_timer_activate || n->tx_coal_usecs > 0 ||
> > +                     n->tx_coal_packets > 0;
>
> No declarations in the middle of the function.
>
> Also, why use_timer = true as long as n->tx_coal_packets > 0?
>
> >
> > -    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
> > -        n->vqs[index].tx_vq =
> > -            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
> > -                             virtio_net_handle_tx_timer);
> > -        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
> > -                                              virtio_net_tx_timer,
> > -                                              &n->vqs[index]);
> > +    if (use_timer && n->tx_pkt_cnt < n->tx_coal_packets) {
> > +        virtio_net_handle_tx_timer(vdev, vq);
> >      } else {
> > -        n->vqs[index].tx_vq =
> > -            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
> > -                             virtio_net_handle_tx_bh);
> > -        n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh,
> &n->vqs[index],
> > -
> &DEVICE(vdev)->mem_reentrancy_guard);
> > +        n->tx_pkt_cnt = 0;
> > +        virtio_net_handle_tx_bh(vdev, vq);
> >      }
> > +}
> > +
> > +static void virtio_net_add_queue(VirtIONet *n, int index)
> > +{
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(n);
> > +
> > +    n->vqs[index].rx_vq =
> > +        virtio_add_queue(vdev,
> > +                         n->net_conf.rx_queue_size,
> > +                         virtio_net_handle_rx);
> > +
> > +    n->vqs[index].tx_vq =
> > +        virtio_add_queue(vdev,
> > +                         n->net_conf.tx_queue_size,
> > +                         virtio_net_handle_tx_dispatch);
> > +
> > +    n->vqs[index].tx_timer =
> > +        timer_new_ns(QEMU_CLOCK_VIRTUAL,
> > +                     virtio_net_tx_timer,
> > +                     &n->vqs[index]);
>
> We should avoid creating a new timer if it will not be used by
> default. Create it only if cmdline enables it or the guest enable it
> via control vq.
>
> > +
> > +    n->vqs[index].tx_bh =
> > +        qemu_bh_new_guarded(virtio_net_tx_bh,
> > +                            &n->vqs[index],
> > +                            &DEVICE(vdev)->mem_reentrancy_guard);
> >
> >      n->vqs[index].tx_waiting = 0;
> >      n->vqs[index].n = n;
> > @@ -3089,6 +3154,7 @@ static void virtio_net_get_features(VirtIODevice
> *vdev, uint64_t *features,
> >      virtio_features_or(features, features, n->host_features_ex);
> >
> >      virtio_add_feature_ex(features, VIRTIO_NET_F_MAC);
> > +    virtio_add_feature_ex(features, VIRTIO_NET_F_NOTF_COAL);
>
> We shouldn't enable VIRTIO_NET_F_NOTF_COAL unconditionally, it should
> be regulated by the cmdline property.
>
> >
> >      if (!peer_has_vnet_hdr(n)) {
> >          virtio_clear_feature_ex(features, VIRTIO_NET_F_CSUM);
> > @@ -3972,6 +4038,10 @@ static void virtio_net_device_realize(DeviceState
> *dev, Error **errp)
> >          error_printf("Defaulting to \"bh\"");
> >      }
> >
> > +    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")) {
> > +        n->tx_timer_activate = true;
> > +    }
> > +
> >      n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
> >                                      n->net_conf.tx_queue_size);
> >
> > @@ -4048,6 +4118,14 @@ static void virtio_net_device_realize(DeviceState
> *dev, Error **errp)
> >              n->rss_data.specified_hash_types.on_bits |
> >              n->rss_data.specified_hash_types.auto_bits;
> >      }
> > +    n->rx_pkt_cnt = 0;
> > +    n->tx_pkt_cnt = 0;
> > +    n->rx_coal_usecs = 0;
> > +    n->tx_coal_usecs = 0;
> > +    n->rx_coal_packets = 0;
> > +    n->tx_coal_packets = 0;
> > +    n->rx_index_timer = timer_new_us(QEMU_CLOCK_VIRTUAL,
> > +                                     virtio_net_rx_notify, n);
>
> Same as the tx timer. Is it worth creating a new timer even if it will
> not be used by default?
>
> >  }
> >
> >  static void virtio_net_device_unrealize(DeviceState *dev)
> > @@ -4262,6 +4340,8 @@ static const Property virtio_net_properties[] = {
> >                        VIRTIO_NET_F_GUEST_USO6, true),
> >      DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
> >                        VIRTIO_NET_F_HOST_USO, true),
> > +    DEFINE_PROP_BIT64("vq_notf_coal", VirtIONet, host_features,
> > +                      VIRTIO_NET_F_NOTF_COAL, true),
> >      DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet,
> >                                    rss_data.specified_hash_types,
> >                                    VIRTIO_NET_HASH_REPORT_IPv4 - 1,
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c
> b/hw/virtio/vhost-shadow-virtqueue.c
> > index 6242aeb69c..104ed0ce8f 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -37,6 +37,7 @@ bool vhost_svq_valid_features(uint64_t features, Error
> **errp)
> >          case VIRTIO_RING_F_INDIRECT_DESC:
> >              continue;
> >
> > +        case VIRTIO_F_RING_RESET:
>
> This patch does not handle VIRTIO_F_RING_RESET, so this should not be
> enabled.
>
> >          case VIRTIO_F_ACCESS_PLATFORM:
> >              /* SVQ trust in the host's IOMMU to translate addresses */
> >          case VIRTIO_F_VERSION_1:
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index 2f8f11df86..8eb9bdb961 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -1541,6 +1541,9 @@ static int vhost_vdpa_get_features(struct
> vhost_dev *dev,
> >      if (ret == 0) {
> >          /* Add SVQ logging capabilities */
> >          *features |= BIT_ULL(VHOST_F_LOG_ALL);
> > +
> > +        /* Add notification coalescing features */
> > +        *features |= BIT_ULL(VIRTIO_NET_F_NOTF_COAL);
> >      }
> >
> >      return ret;
> > diff --git a/include/hw/virtio/virtio-net.h
> b/include/hw/virtio/virtio-net.h
> > index 5b8ab7bda7..024501ed37 100644
> > --- a/include/hw/virtio/virtio-net.h
> > +++ b/include/hw/virtio/virtio-net.h
> > @@ -231,6 +231,14 @@ struct VirtIONet {
> >      struct EBPFRSSContext ebpf_rss;
> >      uint32_t nr_ebpf_rss_fds;
> >      char **ebpf_rss_fds;
> > +    QEMUTimer *rx_index_timer;
> > +    uint32_t rx_coal_usecs;
> > +    uint32_t rx_coal_packets;
> > +    uint32_t rx_pkt_cnt;
> > +    uint32_t tx_coal_usecs;
> > +    uint32_t tx_coal_packets;
> > +    uint32_t tx_pkt_cnt;
> > +    bool tx_timer_activate;
> >  };
> >
> >  size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index 3df6091274..6a5d0f4cae 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -61,6 +61,7 @@ static const int vdpa_feature_bits[] = {
> >      VIRTIO_F_NOTIFY_ON_EMPTY,
> >      VIRTIO_F_RING_PACKED,
> >      VIRTIO_F_RING_RESET,
> > +    VIRTIO_F_ORDER_PLATFORM,
> >      VIRTIO_F_VERSION_1,
> >      VIRTIO_F_IN_ORDER,
> >      VIRTIO_F_NOTIFICATION_DATA,
> > @@ -70,6 +71,8 @@ static const int vdpa_feature_bits[] = {
> >      VIRTIO_NET_F_CTRL_RX,
> >      VIRTIO_NET_F_CTRL_RX_EXTRA,
> >      VIRTIO_NET_F_CTRL_VLAN,
> > +    VIRTIO_NET_F_GUEST_ANNOUNCE,
> > +    VIRTIO_NET_F_NOTF_COAL,
>
> Only this feature can be added to vdpa_feature_bits. The rest of them
> must not be added to any other array.
>
> >      VIRTIO_NET_F_CTRL_VQ,
> >      VIRTIO_NET_F_GSO,
> >      VIRTIO_NET_F_GUEST_CSUM,
> > @@ -115,6 +118,13 @@ static const uint64_t vdpa_svq_device_features =
> >      BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
> >      BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
> >      BIT_ULL(VIRTIO_NET_F_STATUS) |
> > +    BIT_ULL(VIRTIO_F_RING_RESET) |
> > +    BIT_ULL(VIRTIO_F_ORDER_PLATFORM) |
> > +    BIT_ULL(VIRTIO_NET_F_GUEST_USO4) |
> > +    BIT_ULL(VIRTIO_NET_F_GUEST_USO6) |
> > +    BIT_ULL(VIRTIO_NET_F_HOST_USO) |
> > +    BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |
> > +    BIT_ULL(VIRTIO_NET_F_NOTF_COAL) |
> >      BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
> >      BIT_ULL(VIRTIO_NET_F_GSO) |
> >      BIT_ULL(VIRTIO_NET_F_CTRL_RX) |
> > --
> > 2.53.0
> >
>
>

[-- Attachment #2: Type: text/html, Size: 19153 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] hw/net/virtio-net: add support for notification coalescing
  2026-03-23  8:38   ` Koushik Dutta
@ 2026-03-23  9:46     ` Eugenio Perez Martin
  0 siblings, 0 replies; 4+ messages in thread
From: Eugenio Perez Martin @ 2026-03-23  9:46 UTC (permalink / raw)
  To: Koushik Dutta
  Cc: qemu-devel, Stefano Garzarella, Michael S. Tsirkin, Jason Wang

On Mon, Mar 23, 2026 at 9:39 AM Koushik Dutta <kdutta@redhat.com> wrote:
>
> Hi Eugenio,
> Regarding the point about not enabling VIRTIO_NET_F_NOTF_COAL unconditionally: since feature negotiation happens during VM bootup and coalescing is configured via ethtool after the VM has already booted, how would we handle this under a conditional property?
>
> If the feature isn't negotiated at the start, we wouldn't be able to enable it dynamically later. Could you clarify how you'd like the command-line regulation to interact with the guest's ability to enable it later?

Replying at the position of the comment, marked with [1]. Please
interleave your reply for the next time, as it makes revision easier
:) [2].

> ------------------------------------------------------------
>
> > @@ -37,6 +37,7 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp)
> >          case VIRTIO_RING_F_INDIRECT_DESC:
> >              continue;
> >
> > +        case VIRTIO_F_RING_RESET:
>
> This patch does not handle VIRTIO_F_RING_RESET, so this should not be enabled.
>
> => I included VIRTIO_F_RING_RESET in the valid features list because the vDPA host device offers it by default; without it, SVQ fails the feature negotiation check during initialization.
> qemu-system-x86_64: -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0,x-svq=on: SVQ Invalid device feature flags, offer: 0x1e0010330bfffa7, ok: 0x1e0000330bfffa7
>

Since you're testing the feature with nested virtualization, you can
disable the features in the L0 QEMU with:
-device virtio-net-pci,...,queue_reset=off,iommu_platform=off,guest_uso4=off,guest_uso6=off,host_uso=off,guest_announce=off

> Regards,
> Koushik Dutta
>
> On Thu, Mar 12, 2026 at 3:47 PM Eugenio Perez Martin <eperezma@redhat.com> wrote:
>>
>> On Wed, Mar 11, 2026 at 2:28 PM Koushik Dutta <kdutta@redhat.com> wrote:
>> >

[...]

>> > +
>> > +static void virtio_net_add_queue(VirtIONet *n, int index)
>> > +{
>> > +    VirtIODevice *vdev = VIRTIO_DEVICE(n);
>> > +
>> > +    n->vqs[index].rx_vq =
>> > +        virtio_add_queue(vdev,
>> > +                         n->net_conf.rx_queue_size,
>> > +                         virtio_net_handle_rx);
>> > +
>> > +    n->vqs[index].tx_vq =
>> > +        virtio_add_queue(vdev,
>> > +                         n->net_conf.tx_queue_size,
>> > +                         virtio_net_handle_tx_dispatch);
>> > +
>> > +    n->vqs[index].tx_timer =
>> > +        timer_new_ns(QEMU_CLOCK_VIRTUAL,
>> > +                     virtio_net_tx_timer,
>> > +                     &n->vqs[index]);
>>
>> We should avoid creating a new timer if it will not be used by
>> default. Create it only if cmdline enables it or the guest enable it
>> via control vq.
>>

[1]

I'm proposing we switch this to something like:

if (!strcmp(n->net_conf.tx, "timer")) {
  n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
virtio_net_tx_timer, &n->vqs[index]);
}

And, in the CVQ code that you created,

if (n->tx_coal_usecs) {
  n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
virtio_net_tx_timer, &n->vqs[index]);
}

And the same applies to RX timers.

[2] https://subspace.kernel.org/etiquette.html#do-not-top-post-when-replying



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-03-23  9:47 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-11 13:28 [PATCH v2] hw/net/virtio-net: add support for notification coalescing Koushik Dutta
2026-03-12 10:17 ` Eugenio Perez Martin
2026-03-23  8:38   ` Koushik Dutta
2026-03-23  9:46     ` Eugenio Perez Martin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox