* [PATCH v8 1/2] [PATCH 1/2] Introduce virtio_net_handle_tx_dispatch() to unify TX path handling. This dispatcher dynamically selects between timer-based and BH-based TX processing based on configuration.
2026-06-13 22:35 [PATCH v8 0/2] *** virtio-net: Add notification coalescing support Koushik Dutta
@ 2026-06-13 22:35 ` Koushik Dutta
2026-06-13 22:35 ` [PATCH v8 2/2] [PATCH 2/2] Implement VirtIO Network Notification Coalescing (VIRTIO_NET_F_NOTF_COAL). This allows guests to reduce interrupt overhead by configuring coalescing parameters via ethtool -C for both RX and TX paths Koushik Dutta
1 sibling, 0 replies; 4+ messages in thread
From: Koushik Dutta @ 2026-06-13 22:35 UTC (permalink / raw)
To: qemu-devel
Cc: Jason Wang, Stefano Garzarella, Michael S. Tsirkin,
Eugenio Pérez
Previously, the tx=timer selected between two completely separate
code paths at queue creation time. This refactoring introduces a
runtime dispatch mechanism while maintaining identical behavior.
This is a preparatory patch with no functional changes, making it
easier to add dynamic TX notification coalescing in a subsequent patch.
Signed-off-by: Koushik Dutta <kdutta@redhat.com>
---
hw/net/virtio-net.c | 44 ++++++++++++++++++++++------------
include/hw/virtio/virtio-net.h | 1 +
2 files changed, 30 insertions(+), 15 deletions(-)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 2a5d642a64..319842cf28 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1002,6 +1002,8 @@ static void virtio_net_set_features(VirtIODevice *vdev,
}
}
+static void virtio_net_tx_timer(void *opaque);
+
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
struct iovec *iov, unsigned int iov_cnt)
{
@@ -2817,7 +2819,6 @@ detach:
return -EINVAL;
}
-static void virtio_net_tx_timer(void *opaque);
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
{
@@ -2973,6 +2974,22 @@ static void virtio_net_tx_bh(void *opaque)
}
}
+static void virtio_net_handle_tx_dispatch(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIONet *n = VIRTIO_NET(vdev);
+ VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
+
+ if (n->tx_timer_activate) {
+ if (q->tx_timer == NULL) {
+ q->tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ virtio_net_tx_timer, q);
+ }
+ virtio_net_handle_tx_timer(vdev, vq);
+ } else {
+ virtio_net_handle_tx_bh(vdev, vq);
+ }
+}
+
static void virtio_net_add_queue(VirtIONet *n, int index)
{
VirtIODevice *vdev = VIRTIO_DEVICE(n);
@@ -2980,20 +2997,13 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
virtio_net_handle_rx);
- if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
- n->vqs[index].tx_vq =
- virtio_add_queue(vdev, n->net_conf.tx_queue_size,
- virtio_net_handle_tx_timer);
- n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
- virtio_net_tx_timer,
- &n->vqs[index]);
- } else {
- n->vqs[index].tx_vq =
- virtio_add_queue(vdev, n->net_conf.tx_queue_size,
- virtio_net_handle_tx_bh);
- n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
- &DEVICE(vdev)->mem_reentrancy_guard);
- }
+ n->vqs[index].tx_vq =
+ virtio_add_queue(vdev, n->net_conf.tx_queue_size,
+ virtio_net_handle_tx_dispatch);
+
+ n->vqs[index].tx_bh =
+ qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
+ &DEVICE(vdev)->mem_reentrancy_guard);
n->vqs[index].tx_waiting = 0;
n->vqs[index].n = n;
@@ -3970,6 +3980,10 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
error_printf("Defaulting to \"bh\"");
}
+ if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") == 0) {
+ n->tx_timer_activate = true;
+ }
+
n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
n->net_conf.tx_queue_size);
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 371e376428..a4eb3f407e 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -230,6 +230,7 @@ struct VirtIONet {
struct EBPFRSSContext ebpf_rss;
uint32_t nr_ebpf_rss_fds;
char **ebpf_rss_fds;
+ bool tx_timer_activate;
};
size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
--
2.53.0
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH v8 2/2] [PATCH 2/2] Implement VirtIO Network Notification Coalescing (VIRTIO_NET_F_NOTF_COAL). This allows guests to reduce interrupt overhead by configuring coalescing parameters via ethtool -C for both RX and TX paths.
2026-06-13 22:35 [PATCH v8 0/2] *** virtio-net: Add notification coalescing support Koushik Dutta
2026-06-13 22:35 ` [PATCH v8 1/2] [PATCH 1/2] Introduce virtio_net_handle_tx_dispatch() to unify TX path handling. This dispatcher dynamically selects between timer-based and BH-based TX processing based on configuration Koushik Dutta
@ 2026-06-13 22:35 ` Koushik Dutta
2026-06-17 1:30 ` [PATCH v8 2/2] Implement VirtIO Network Notification Coalescing Bin Guo
1 sibling, 1 reply; 4+ messages in thread
From: Koushik Dutta @ 2026-06-13 22:35 UTC (permalink / raw)
To: qemu-devel
Cc: Jason Wang, Stefano Garzarella, Michael S. Tsirkin,
Eugenio Pérez
The feature supports two coalescing modes:
- Time-based: delay notifications up to N microseconds
- Count-based: delay until N packets are processed
Implementation details:
- Added VIRTIO_NET_CTRL_NOTF_COAL class handling in control virtqueue
- RX path: batches notifications based on packet count or timeout
- TX path: leverages the unified dispatcher to dynamically enable
timer-based coalescing when guest configures it via ethtool
- Coalescing parameters persist across live migration
Note: During VM launch if we provide tx=timer, then coalescing feature
will not be active. As, user already introduce delay 150ms.
Signed-off-by: Koushik Dutta <kdutta@redhat.com>
---
hw/net/virtio-net.c | 144 ++++++++++++++++++++++++++++++++-
include/hw/virtio/virtio-net.h | 7 ++
net/passt.c | 1 +
net/tap.c | 1 +
net/vhost-user.c | 1 +
net/vhost-vdpa.c | 1 +
6 files changed, 154 insertions(+), 1 deletion(-)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 319842cf28..3dbd2f4c11 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -157,6 +157,16 @@ static void flush_or_purge_queued_packets(NetClientState *nc)
* - we could suppress RX interrupt if we were so inclined.
*/
+static void virtio_net_rx_notify(void *opaque)
+{
+ VirtIONetQueue *q = opaque;
+ VirtIONet *n = q->n;
+ VirtIODevice *vdev = VIRTIO_DEVICE(n);
+
+ n->rx_pkt_cnt = 0;
+ virtio_notify(vdev, q->rx_vq);
+}
+
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
{
VirtIONet *n = VIRTIO_NET(vdev);
@@ -1004,6 +1014,62 @@ static void virtio_net_set_features(VirtIODevice *vdev,
static void virtio_net_tx_timer(void *opaque);
+static int virtio_net_handle_coal(VirtIONet *n, uint8_t cmd,
+ struct iovec *iov, unsigned int iov_cnt)
+{
+ struct virtio_net_ctrl_coal coal;
+ VirtIONetQueue *q;
+ size_t s;
+ int i;
+
+ s = iov_to_buf(iov, iov_cnt, 0, &coal, sizeof(coal));
+ if (s != sizeof(coal)) {
+ return VIRTIO_NET_ERR;
+ }
+
+ if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_RX_SET) {
+ n->rx_coal_usecs = le32_to_cpu(coal.max_usecs);
+ n->rx_coal_packets = le32_to_cpu(coal.max_packets);
+ for (i = 0; i < n->max_queue_pairs; i++) {
+ q = &n->vqs[i];
+ if (n->rx_coal_usecs > 0) {
+ if (!q->rx_timer) {
+ q->rx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ virtio_net_rx_notify,
+ q);
+ }
+ } else {
+ if (q->rx_timer) {
+ timer_free(q->rx_timer);
+ q->rx_timer = NULL;
+ }
+ }
+ }
+ } else if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_TX_SET) {
+ n->tx_coal_usecs = le32_to_cpu(coal.max_usecs);
+ n->tx_coal_packets = le32_to_cpu(coal.max_packets);
+ /* Converted us to ns */
+ n->tx_timeout = n->tx_coal_usecs * 1000;
+ for (i = 0; i < n->max_queue_pairs; i++) {
+ q = &n->vqs[i];
+ if (n->tx_coal_usecs > 0) {
+ if (!q->tx_timer) {
+ q->tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ virtio_net_tx_timer,
+ q);
+ }
+ } else {
+ if (q->tx_timer) {
+ timer_free(q->tx_timer);
+ q->tx_timer = NULL;
+ }
+ }
+ }
+ }
+
+ return VIRTIO_NET_OK;
+}
+
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
struct iovec *iov, unsigned int iov_cnt)
{
@@ -1583,6 +1649,8 @@ size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
} else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
+ } else if (ctrl.class == VIRTIO_NET_CTRL_NOTF_COAL) {
+ status = virtio_net_handle_coal(n, ctrl.cmd, iov, out_num);
}
s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
@@ -2042,7 +2110,23 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
}
virtqueue_flush(q->rx_vq, i);
- virtio_notify(vdev, q->rx_vq);
+
+ /* rx coalescing */
+ n->rx_pkt_cnt += i;
+ if (n->rx_coal_usecs == 0 || n->rx_pkt_cnt >= n->rx_coal_packets) {
+ if (q->rx_timer) {
+ timer_del(q->rx_timer);
+ }
+ virtio_net_rx_notify(q);
+ } else {
+ if (q->rx_timer) {
+ if (!timer_pending(q->rx_timer)) {
+ timer_mod(q->rx_timer,
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+ n->rx_coal_usecs * 1000);
+ }
+ }
+ }
return size;
@@ -2900,6 +2984,12 @@ static void virtio_net_tx_timer(void *opaque)
if (ret == -EBUSY || ret == -EINVAL) {
return;
}
+ if (n->tx_pkt_cnt < ret) {
+ n->tx_pkt_cnt = 0;
+ } else {
+ n->tx_pkt_cnt -= ret;
+ }
+
/*
* If we flush a full burst of packets, assume there are
* more coming and immediately rearm
@@ -2919,6 +3009,7 @@ static void virtio_net_tx_timer(void *opaque)
ret = virtio_net_flush_tx(q);
if (ret > 0) {
virtio_queue_set_notification(q->tx_vq, 0);
+ n->tx_pkt_cnt -= ret;
q->tx_waiting = 1;
timer_mod(q->tx_timer,
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
@@ -2985,6 +3076,20 @@ static void virtio_net_handle_tx_dispatch(VirtIODevice *vdev, VirtQueue *vq)
virtio_net_tx_timer, q);
}
virtio_net_handle_tx_timer(vdev, vq);
+ } else if (n->tx_coal_usecs > 0) {
+ n->tx_pkt_cnt++;
+ if (n->tx_pkt_cnt < n->tx_coal_packets) {
+ if (q->tx_timer) {
+ virtio_net_handle_tx_timer(vdev, vq);
+ return;
+ }
+ } else {
+ n->tx_pkt_cnt = 0;
+ if (q->tx_timer) {
+ timer_del(q->tx_timer);
+ }
+ virtio_net_handle_tx_bh(vdev, vq);
+ }
} else {
virtio_net_handle_tx_bh(vdev, vq);
}
@@ -3098,6 +3203,9 @@ static void virtio_net_get_features(VirtIODevice *vdev, uint64_t *features,
virtio_features_or(features, features, n->host_features_ex);
virtio_add_feature_ex(features, VIRTIO_NET_F_MAC);
+ if (n->tx_timer_activate) {
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_NOTF_COAL);
+ }
if (!peer_has_vnet_hdr(n)) {
virtio_clear_feature_ex(features, VIRTIO_NET_F_CSUM);
@@ -3252,6 +3360,29 @@ static int virtio_net_post_load_device(void *opaque, int version_id)
}
virtio_net_commit_rss_config(n);
+ if (n->tx_coal_usecs > 0 || n->rx_coal_usecs > 0) {
+
+ for (i = 0; i < n->max_queue_pairs; i++) {
+ VirtIONetQueue *q = &n->vqs[i];
+ if (n->rx_coal_usecs > 0) {
+ if (!q->rx_timer) {
+ q->rx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ virtio_net_rx_notify,
+ q);
+ }
+ }
+
+ if (n->tx_coal_usecs > 0) {
+ n->tx_timeout = n->tx_coal_usecs * 1000;
+ if (!q->tx_timer) {
+ q->tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ virtio_net_tx_timer,
+ q);
+ }
+ }
+ }
+ }
+
return 0;
}
@@ -3627,6 +3758,10 @@ static const VMStateDescription vmstate_virtio_net_device = {
vmstate_virtio_net_tx_waiting),
VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
has_ctrl_guest_offloads),
+ VMSTATE_UINT32(rx_coal_usecs, VirtIONet),
+ VMSTATE_UINT32(tx_coal_usecs, VirtIONet),
+ VMSTATE_UINT32(rx_coal_packets, VirtIONet),
+ VMSTATE_UINT32(tx_coal_packets, VirtIONet),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription * const []) {
@@ -4060,6 +4195,11 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
n->rss_data.specified_hash_types.on_bits |
n->rss_data.specified_hash_types.auto_bits;
}
+ n->rx_pkt_cnt = 0;
+ n->tx_pkt_cnt = 0;
+ n->rx_coal_usecs = 0;
+ n->rx_coal_packets = 0;
+ n->tx_coal_packets = 0;
}
static void virtio_net_device_unrealize(DeviceState *dev)
@@ -4272,6 +4412,8 @@ static const Property virtio_net_properties[] = {
VIRTIO_NET_F_GUEST_USO6, true),
DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
VIRTIO_NET_F_HOST_USO, true),
+ DEFINE_PROP_BIT64("vq_notf_coal", VirtIONet, host_features,
+ VIRTIO_NET_F_NOTF_COAL, true),
DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet,
rss_data.specified_hash_types,
VIRTIO_NET_HASH_REPORT_IPv4 - 1,
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index a4eb3f407e..9612416622 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -159,6 +159,7 @@ typedef struct VirtIONetQueue {
VirtQueue *rx_vq;
VirtQueue *tx_vq;
QEMUTimer *tx_timer;
+ QEMUTimer *rx_timer;
QEMUBH *tx_bh;
uint32_t tx_waiting;
struct {
@@ -230,6 +231,12 @@ struct VirtIONet {
struct EBPFRSSContext ebpf_rss;
uint32_t nr_ebpf_rss_fds;
char **ebpf_rss_fds;
+ uint32_t rx_coal_usecs; /* RX interrupt coalescing timeout (microseconds) */
+ uint32_t rx_coal_packets; /* RX packet count threshold for coalescing */
+ uint32_t rx_pkt_cnt; /* Current RX packet count since last notification */
+ uint32_t tx_coal_usecs; /* TX interrupt coalescing timeout (microseconds) */
+ uint32_t tx_coal_packets; /* TX packet count threshold for coalescing */
+ uint32_t tx_pkt_cnt; /* Current TX packet count since last notification */
bool tx_timer_activate;
};
diff --git a/net/passt.c b/net/passt.c
index 45440c399b..43b36ed8c5 100644
--- a/net/passt.c
+++ b/net/passt.c
@@ -52,6 +52,7 @@ static const int user_feature_bits[] = {
VIRTIO_NET_F_GUEST_USO4,
VIRTIO_NET_F_GUEST_USO6,
VIRTIO_NET_F_HOST_USO,
+ VIRTIO_NET_F_NOTF_COAL,
/* This bit implies RARP isn't sent by QEMU out of band */
VIRTIO_NET_F_GUEST_ANNOUNCE,
diff --git a/net/tap.c b/net/tap.c
index 57ffb09885..e6ddbc1eb1 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -63,6 +63,7 @@ static const int kernel_feature_bits[] = {
VIRTIO_F_NOTIFICATION_DATA,
VIRTIO_NET_F_RSC_EXT,
VIRTIO_NET_F_HASH_REPORT,
+ VIRTIO_NET_F_NOTF_COAL,
VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO,
VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO,
VHOST_INVALID_FEATURE_BIT
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 2d0fc49b4d..f1e9b7a038 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -54,6 +54,7 @@ static const int user_feature_bits[] = {
VIRTIO_NET_F_GUEST_USO4,
VIRTIO_NET_F_GUEST_USO6,
VIRTIO_NET_F_HOST_USO,
+ VIRTIO_NET_F_NOTF_COAL,
/* This bit implies RARP isn't sent by QEMU out of band */
VIRTIO_NET_F_GUEST_ANNOUNCE,
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index f1523697e2..0dcd6fb9f1 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -70,6 +70,7 @@ static const int vdpa_feature_bits[] = {
VIRTIO_NET_F_CTRL_RX,
VIRTIO_NET_F_CTRL_RX_EXTRA,
VIRTIO_NET_F_CTRL_VLAN,
+ VIRTIO_NET_F_NOTF_COAL,
VIRTIO_NET_F_CTRL_VQ,
VIRTIO_NET_F_GSO,
VIRTIO_NET_F_GUEST_CSUM,
--
2.53.0
^ permalink raw reply related [flat|nested] 4+ messages in thread