Re: [PATCH v3] hw/net/virtio-net: add support for notification coalescing

public inbox for qemu-devel@nongnu.org
 help / color / mirror / Atom feed

From: Eugenio Perez Martin <eperezma@redhat.com>
To: Koushik Dutta <kdutta@redhat.com>
Cc: qemu-devel@nongnu.org, Stefano Garzarella <sgarzare@redhat.com>,
	 Jason Wang <jasowang@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>
Subject: Re: [PATCH v3] hw/net/virtio-net: add support for notification coalescing
Date: Tue, 24 Mar 2026 10:15:34 +0100	[thread overview]
Message-ID: <CAJaqyWfgQVJF3tmXDQ2vjgth1X6P3rVPn0f+LygJERqWb733NA@mail.gmail.com> (raw)
In-Reply-To: <20260322195624.381837-1-kdutta@redhat.com>

On Sun, Mar 22, 2026 at 8:56 PM Koushik Dutta <kdutta@redhat.com> wrote:
>
> Implement VirtIO Network Notification Coalescing (Bit 53).
> This allows the guest to manage interrupt frequency using ethtool
> -C for both RX and TX paths.
>
> - Added VIRTIO_NET_F_NOTF_COAL to host features.
> - Implemented VIRTIO_NET_CTRL_NOTF_COAL class handling in
>   virtio_net_handle_ctrl_iov.
> - Added logic to store and apply rx/tx usecs and max_packets.
> - Added packet counters and threshold logic for both RX and TX data paths.
> - Dynamic Dispatcher: Implemented a dispatcher mechanism that
>   dynamically switches/activates the notification callback logic
>   only after the guest enables TX coalescing via ethtool.
>
> vhost-vdpa: add support for SVQ interrupt coalescing.
>
> This reduces interrupt overhead by batching notifications based on
> either a packet count or a time-based threshold.
>
> Signed-off-by: Koushik Dutta <kdutta@redhat.com>
> ---
>  hw/net/virtio-net.c                | 133 +++++++++++++++++++++++++----
>  hw/virtio/vhost-shadow-virtqueue.c |   1 +
>  hw/virtio/vhost-vdpa.c             |   3 +
>  include/hw/virtio/virtio-net.h     |   8 ++
>  net/vhost-vdpa.c                   |   7 ++
>  5 files changed, 135 insertions(+), 17 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index eccb48ad42..5fb61944f1 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -157,6 +157,15 @@ static void flush_or_purge_queued_packets(NetClientState *nc)
>   * - we could suppress RX interrupt if we were so inclined.
>   */
>
> +static void virtio_net_rx_notify(void *opaque)
> +{
> +    VirtIONet *n = opaque;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(n);
> +
> +    n->rx_pkt_cnt = 0;
> +    virtio_notify(vdev, n->vqs[0].rx_vq);
> +}
> +
>  static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
>  {
>      VirtIONet *n = VIRTIO_NET(vdev);
> @@ -1081,6 +1090,33 @@ static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
>      }
>  }
>
> +static int virtio_net_handle_coal(VirtIONet *n, uint8_t cmd,
> +                                  struct iovec *iov, unsigned int iov_cnt)
> +{
> +    struct virtio_net_ctrl_coal coal;
> +    size_t s;
> +
> +    s = iov_to_buf(iov, iov_cnt, 0, &coal, sizeof(coal));
> +    if (s != sizeof(coal)) {
> +        return VIRTIO_NET_ERR;
> +    }
> +
> +    if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_RX_SET) {
> +        n->rx_coal_usecs = le32_to_cpu(coal.max_usecs);
> +        n->rx_coal_packets = le32_to_cpu(coal.max_packets);
> +        if (!n->rx_index_timer) {
> +            n->rx_index_timer = timer_new_us(QEMU_CLOCK_VIRTUAL,
> +                                             virtio_net_rx_notify, n);
> +        }
> +    } else if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_TX_SET) {
> +        n->tx_coal_usecs = le32_to_cpu(coal.max_usecs);
> +        n->tx_coal_packets = le32_to_cpu(coal.max_packets);
> +        n->tx_timeout = n->tx_coal_usecs * 1000;
> +    }
> +
> +    return VIRTIO_NET_OK;
> +}
> +
>  static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
>                                   struct iovec *iov, unsigned int iov_cnt)
>  {
> @@ -1582,6 +1618,8 @@ size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
>          status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
>      } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
>          status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
> +    } else if (ctrl.class == VIRTIO_NET_CTRL_NOTF_COAL) {
> +        status = virtio_net_handle_coal(n, ctrl.cmd, iov, out_num);
>      }
>
>      s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
> @@ -2041,7 +2079,24 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
>      }
>
>      virtqueue_flush(q->rx_vq, i);
> -    virtio_notify(vdev, q->rx_vq);
> +
> +    /* rx coalescing */
> +    n->rx_pkt_cnt += i;
> +    if (n->rx_coal_usecs == 0 || n->rx_pkt_cnt >= n->rx_coal_packets) {
> +        if (n->rx_index_timer) {
> +            timer_del(n->rx_index_timer);
> +        }
> +        virtio_net_rx_notify(n);
> +    } else {
> +        if (!n->rx_index_timer) {
> +            n->rx_index_timer = timer_new_us(QEMU_CLOCK_VIRTUAL,
> +                                             virtio_net_rx_notify, n);

Let's move the creation of the timer to the CVQ handling, so the
malloc is out of the datapath.

> +        }
> +        if (!timer_pending(n->rx_index_timer)) {
> +            timer_mod(n->rx_index_timer,
> +                      qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + n->rx_coal_usecs);
> +        }
> +    }
>
>      return size;
>
> @@ -2900,6 +2955,12 @@ static void virtio_net_tx_timer(void *opaque)
>      if (ret == -EBUSY || ret == -EINVAL) {
>          return;
>      }
> +    if (n->tx_pkt_cnt < ret) {
> +        n->tx_pkt_cnt = 0;
> +    } else {
> +        n->tx_pkt_cnt -= ret;
> +    }
> +
>      /*
>       * If we flush a full burst of packets, assume there are
>       * more coming and immediately rearm
> @@ -2919,6 +2980,7 @@ static void virtio_net_tx_timer(void *opaque)
>      ret = virtio_net_flush_tx(q);
>      if (ret > 0) {
>          virtio_queue_set_notification(q->tx_vq, 0);
> +        n->tx_pkt_cnt -= ret;
>          q->tx_waiting = 1;
>          timer_mod(q->tx_timer,
>                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> @@ -2974,27 +3036,50 @@ static void virtio_net_tx_bh(void *opaque)
>      }
>  }
>
> +static void virtio_net_handle_tx_dispatch(VirtIODevice *vdev, VirtQueue *vq)
> +{
> +    VirtIONet *n = VIRTIO_NET(vdev);
> +    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
> +    bool use_timer = n->tx_timer_activate || n->tx_coal_usecs > 0 ||
> +                     n->tx_coal_packets > 0;
> +    bool pkt_limit = (n->tx_coal_packets > 0);
> +
> +    n->tx_pkt_cnt++;
> +    if (use_timer) {
> +        if (!pkt_limit || n->tx_pkt_cnt < n->tx_coal_packets) {
> +            if (!q->tx_timer) {
> +                q->tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
> +                                           virtio_net_tx_timer,
> +                                           q);

Same here, let's create in either CVQ handling or the queue creation.

Combining the "tx_timer" command-line argument and the guest's
handling of coalescing is difficult. Maybe we should forbid enabling
both of them simultaneously? Jason, MST, what do you think?

In my opinion it would be:
* By default, the virtio feature is enabled and the tx_timer is false.
* If you want to enable only host side coalescing with tx_timer, you
must explicitly set vq_notf_coal feature to off.

But this causes you to need to change the command line if you spawn a
new QEMU instance with the old cmdline parameters if you have
tx_timer=N. Live migration should be ok since `vq_notf_coal` is set to
off from the previous machine type.

That's something that needs to be added in this patch.
hw/core/machine.c changes to know that old machines have that off,
like the commit 1c79ab6937ae938d3dfd4da1c01afc7eb599857e ("virtio-net:
Advertise UDP tunnel GSO support by default") for example.

> +            }
> +            virtio_net_handle_tx_timer(vdev, vq);
> +        } else {
> +            n->tx_pkt_cnt = 0;
> +            virtio_net_handle_tx_bh(vdev, vq);
> +        }
> +    } else {
> +        virtio_net_handle_tx_bh(vdev, vq);
> +    }
> +}
> +
>  static void virtio_net_add_queue(VirtIONet *n, int index)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(n);
>
> -    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
> -                                           virtio_net_handle_rx);
> +    n->vqs[index].rx_vq =
> +        virtio_add_queue(vdev,
> +                         n->net_conf.rx_queue_size,
> +                         virtio_net_handle_rx);

This is just a formatting change, let's omit it so the patch contains
the fewest lines changed possible.

>
> -    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
> -        n->vqs[index].tx_vq =
> -            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
> -                             virtio_net_handle_tx_timer);
> -        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
> -                                              virtio_net_tx_timer,
> -                                              &n->vqs[index]);
> -    } else {
> -        n->vqs[index].tx_vq =
> -            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
> -                             virtio_net_handle_tx_bh);
> -        n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
> -                                                  &DEVICE(vdev)->mem_reentrancy_guard);
> -    }
> +    n->vqs[index].tx_vq =
> +        virtio_add_queue(vdev,
> +                         n->net_conf.tx_queue_size,
> +                         virtio_net_handle_tx_dispatch);
> +
> +    n->vqs[index].tx_bh =
> +        qemu_bh_new_guarded(virtio_net_tx_bh,
> +                            &n->vqs[index],
> +                            &DEVICE(vdev)->mem_reentrancy_guard);
>
>      n->vqs[index].tx_waiting = 0;
>      n->vqs[index].n = n;
> @@ -3089,6 +3174,7 @@ static void virtio_net_get_features(VirtIODevice *vdev, uint64_t *features,
>      virtio_features_or(features, features, n->host_features_ex);
>
>      virtio_add_feature_ex(features, VIRTIO_NET_F_MAC);
> +    virtio_add_feature_ex(features, VIRTIO_NET_F_NOTF_COAL);

This adds the feature unconditionally even if "vq_notf_coal" is set to
off. This change needs to be removed.

>
>      if (!peer_has_vnet_hdr(n)) {
>          virtio_clear_feature_ex(features, VIRTIO_NET_F_CSUM);
> @@ -3972,6 +4058,10 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>          error_printf("Defaulting to \"bh\"");
>      }
>
> +    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")) {
> +        n->tx_timer_activate = true;
> +    }
> +
>      n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
>                                      n->net_conf.tx_queue_size);
>
> @@ -4048,6 +4138,13 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>              n->rss_data.specified_hash_types.on_bits |
>              n->rss_data.specified_hash_types.auto_bits;
>      }
> +    n->rx_pkt_cnt = 0;
> +    n->tx_pkt_cnt = 0;
> +    n->rx_coal_usecs = 0;
> +    n->tx_coal_usecs = 0;
> +    n->rx_coal_packets = 0;
> +    n->tx_coal_packets = 0;
> +    n->rx_index_timer = NULL;
>  }
>
>  static void virtio_net_device_unrealize(DeviceState *dev)
> @@ -4262,6 +4359,8 @@ static const Property virtio_net_properties[] = {
>                        VIRTIO_NET_F_GUEST_USO6, true),
>      DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
>                        VIRTIO_NET_F_HOST_USO, true),
> +    DEFINE_PROP_BIT64("vq_notf_coal", VirtIONet, host_features,
> +                      VIRTIO_NET_F_NOTF_COAL, true),
>      DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet,
>                                    rss_data.specified_hash_types,
>                                    VIRTIO_NET_HASH_REPORT_IPv4 - 1,
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> index 6242aeb69c..104ed0ce8f 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -37,6 +37,7 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp)
>          case VIRTIO_RING_F_INDIRECT_DESC:
>              continue;
>
> +        case VIRTIO_F_RING_RESET:

This patch does not implement the reset feature in SVQ, so this change
needs to be removed.

>          case VIRTIO_F_ACCESS_PLATFORM:
>              /* SVQ trust in the host's IOMMU to translate addresses */
>          case VIRTIO_F_VERSION_1:
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 2f8f11df86..8eb9bdb961 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -1541,6 +1541,9 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev,
>      if (ret == 0) {
>          /* Add SVQ logging capabilities */
>          *features |= BIT_ULL(VHOST_F_LOG_ALL);
> +
> +        /* Add notification coalescing features */
> +        *features |= BIT_ULL(VIRTIO_NET_F_NOTF_COAL);

Same here, this change does not add the notif_coal features to all
existing vdpa devices. So this change needs to be removed.

>      }
>
>      return ret;
> diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
> index 5b8ab7bda7..024501ed37 100644
> --- a/include/hw/virtio/virtio-net.h
> +++ b/include/hw/virtio/virtio-net.h
> @@ -231,6 +231,14 @@ struct VirtIONet {
>      struct EBPFRSSContext ebpf_rss;
>      uint32_t nr_ebpf_rss_fds;
>      char **ebpf_rss_fds;
> +    QEMUTimer *rx_index_timer;
> +    uint32_t rx_coal_usecs;
> +    uint32_t rx_coal_packets;
> +    uint32_t rx_pkt_cnt;
> +    uint32_t tx_coal_usecs;
> +    uint32_t tx_coal_packets;
> +    uint32_t tx_pkt_cnt;
> +    bool tx_timer_activate;
>  };
>
>  size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 3df6091274..3fef894d8d 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -70,6 +70,7 @@ static const int vdpa_feature_bits[] = {
>      VIRTIO_NET_F_CTRL_RX,
>      VIRTIO_NET_F_CTRL_RX_EXTRA,
>      VIRTIO_NET_F_CTRL_VLAN,
> +    VIRTIO_NET_F_NOTF_COAL,
>      VIRTIO_NET_F_CTRL_VQ,
>      VIRTIO_NET_F_GSO,
>      VIRTIO_NET_F_GUEST_CSUM,
> @@ -115,6 +116,12 @@ static const uint64_t vdpa_svq_device_features =
>      BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
>      BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
>      BIT_ULL(VIRTIO_NET_F_STATUS) |
> +    BIT_ULL(VIRTIO_F_RING_RESET) |
> +    BIT_ULL(VIRTIO_NET_F_GUEST_USO4) |
> +    BIT_ULL(VIRTIO_NET_F_GUEST_USO6) |
> +    BIT_ULL(VIRTIO_NET_F_HOST_USO) |
> +    BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |
> +    BIT_ULL(VIRTIO_NET_F_NOTF_COAL) |

The same applies to all these features.

>      BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
>      BIT_ULL(VIRTIO_NET_F_GSO) |
>      BIT_ULL(VIRTIO_NET_F_CTRL_RX) |
> --
> 2.53.0
>

     prev parent reply	other threads:[~2026-03-24  9:16 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-22 19:56 [PATCH v3] hw/net/virtio-net: add support for notification coalescing Koushik Dutta
2026-03-24  9:15 ` Eugenio Perez Martin [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAJaqyWfgQVJF3tmXDQ2vjgth1X6P3rVPn0f+LygJERqWb733NA@mail.gmail.com \
    --to=eperezma@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=kdutta@redhat.com \
    --cc=mst@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=sgarzare@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox