From mboxrd@z Thu Jan  1 00:00:00 1970
From: Anthony Liguori <anthony@codemonkey.ws>
Subject: Re: [PATCH 5/6] kvm: qemu: virtio-net: handle all tx in I/O thread
 without timer
Date: Thu, 30 Oct 2008 14:24:31 -0500
Message-ID: <490A09EF.2030006@codemonkey.ws>
References: <> <1225389113-28332-1-git-send-email-markmc@redhat.com> <1225389113-28332-2-git-send-email-markmc@redhat.com> <1225389113-28332-3-git-send-email-markmc@redhat.com> <1225389113-28332-4-git-send-email-markmc@redhat.com> <1225389113-28332-5-git-send-email-markmc@redhat.com> <1225389113-28332-6-git-send-email-markmc@redhat.com>
Mime-Version: 1.0
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit
Cc: Avi Kivity <avi@qumranet.com>, kvm@vger.kernel.org
To: Mark McLoughlin <markmc@redhat.com>
Return-path: <kvm-owner@vger.kernel.org>
Received: from hs-out-0708.google.com ([64.233.178.243]:65018 "EHLO
	hs-out-0708.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1755283AbYJ3TYk (ORCPT <rfc822;kvm@vger.kernel.org>);
	Thu, 30 Oct 2008 15:24:40 -0400
Received: by hs-out-0708.google.com with SMTP id 4so389564hsl.5
        for <kvm@vger.kernel.org>; Thu, 30 Oct 2008 12:24:39 -0700 (PDT)
In-Reply-To: <1225389113-28332-6-git-send-email-markmc@redhat.com>
Sender: kvm-owner@vger.kernel.org
List-ID: <kvm.vger.kernel.org>

Mark McLoughlin wrote:
> By removing the tx timer altogether and doing all the copies in the
> I/O thread, we can keep the I/O churning away in parallel with the
> guest generating more I/O.
>
> In my tests, this significantly increases guest->host throughput,
> causes a minor increase in host->guest throughput, reduces CPU
> utilization somewhat and greatly reduces roundtrip times.
>
> Even aside from the benchmark results, removing the arbitrary 150us
> timer is a nicer option than coming up with a heuristic to make it
> vary according to load. Finally, on kernels which don't have a
> suitably low posix timer latency, we won't be scuppered by effectively
> having e.g. a 1ms timer.
>
> Note, this highlights that the I/O thread may become a scalability
> concern and we might want to consider e.g. an I/O thread per device.
>
> Note also that when tuning for a specific workload, which CPU
> the I/O thread is pinned to is important.
>   

Instead of using an event fd, perhaps you could just schedule a bottom 
half?  I think that would be a whole lot cleaner.

Regards,

Anthony Liguori

> Signed-off-by: Mark McLoughlin <markmc@redhat.com>
> ---
>  qemu/hw/virtio-net.c |   79 ++++++++++++++++++++++++++++---------------------
>  1 files changed, 45 insertions(+), 34 deletions(-)
>
> diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
> index bc2ede6..0612f5f 100644
> --- a/qemu/hw/virtio-net.c
> +++ b/qemu/hw/virtio-net.c
> @@ -15,6 +15,8 @@
>  #include "net.h"
>  #include "qemu-timer.h"
>  #include "qemu-kvm.h"
> +#include "qemu-char.h"
> +#include "compatfd.h"
>  
>  /* from Linux's virtio_net.h */
>  
> @@ -35,8 +37,6 @@
>  #define VIRTIO_NET_F_HOST_ECN	13	/* Host can handle TSO[6] w/ ECN in. */
>  #define VIRTIO_NET_F_HOST_UFO	14	/* Host can handle UFO in. */
>  
> -#define TX_TIMER_INTERVAL 150000 /* 150 us */
> -
>  /* The config defining mac address (6 bytes) */
>  struct virtio_net_config
>  {
> @@ -68,8 +68,7 @@ typedef struct VirtIONet
>      VirtQueue *rx_vq;
>      VirtQueue *tx_vq;
>      VLANClientState *vc;
> -    QEMUTimer *tx_timer;
> -    int tx_timer_active;
> +    int tx_eventfds[2];
>  } VirtIONet;
>  
>  /* TODO
> @@ -227,13 +226,14 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
>  }
>  
>  /* TX */
> -static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
> +static int virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
>  {
>      VirtQueueElement elem;
>      int has_vnet_hdr = tap_has_vnet_hdr(n->vc->vlan->first_client);
> +    int num_packets = 0;
>  
>      if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
> -        return;
> +        return num_packets;
>  
>      while (virtqueue_pop(vq, &elem)) {
>  	ssize_t len = 0;
> @@ -256,38 +256,31 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
>  
>  	virtqueue_push(vq, &elem, len);
>  	virtio_notify(&n->vdev, vq);
> +
> +	num_packets++;
>      }
> +
> +    return num_packets;
>  }
>  
>  static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      VirtIONet *n = to_virtio_net(vdev);
>  
> -    if (n->tx_timer_active) {
> -	vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
> -	qemu_del_timer(n->tx_timer);
> -	n->tx_timer_active = 0;
> -	virtio_net_flush_tx(n, vq);
> -    } else {
> -	qemu_mod_timer(n->tx_timer,
> -		       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
> -	n->tx_timer_active = 1;
> -	vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
> -    }
> +    vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
> +    qemu_eventfd_write(n->tx_eventfds[1], 1);
>  }
>  
> -static void virtio_net_tx_timer(void *opaque)
> +static void virtio_net_tx_event(void *opaque)
>  {
>      VirtIONet *n = opaque;
>  
> -    n->tx_timer_active = 0;
> +    qemu_eventfd_read(n->tx_eventfds[0]);
>  
> -    /* Just in case the driver is not ready on more */
> -    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
> -        return;
> -
> -    n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
> -    virtio_net_flush_tx(n, n->tx_vq);
> +    if (!virtio_net_flush_tx(n, n->tx_vq))
> +        n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
> +    else
> +        qemu_eventfd_write(n->tx_eventfds[1], 1);
>  }
>  
>  static void virtio_net_save(QEMUFile *f, void *opaque)
> @@ -297,7 +290,6 @@ static void virtio_net_save(QEMUFile *f, void *opaque)
>      virtio_save(&n->vdev, f);
>  
>      qemu_put_buffer(f, n->mac, 6);
> -    qemu_put_be32(f, n->tx_timer_active);
>  }
>  
>  static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
> @@ -310,12 +302,16 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
>      virtio_load(&n->vdev, f);
>  
>      qemu_get_buffer(f, n->mac, 6);
> -    n->tx_timer_active = qemu_get_be32(f);
>  
> -    if (n->tx_timer_active) {
> -	qemu_mod_timer(n->tx_timer,
> -		       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
> -    }
> +    return 0;
> +}
> +
> +static int virtio_net_uninit(PCIDevice *dev)
> +{
> +    VirtIONet *n = (VirtIONet *)dev;
> +
> +    close(n->tx_eventfds[0]);
> +    close(n->tx_eventfds[1]);
>  
>      return 0;
>  }
> @@ -324,13 +320,23 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
>  {
>      VirtIONet *n;
>      static int virtio_net_id;
> +    int eventfds[2];
> +
> +    if (qemu_eventfd(eventfds) == -1) {
> +        fprintf(stderr, "Failed to create eventfds : %s\n",
> +                strerror(errno));
> +        return NULL;
> +    }
>  
>      n = (VirtIONet *)virtio_init_pci(bus, "virtio-net", 6900, 0x1000,
>  				     0, VIRTIO_ID_NET,
>  				     0x02, 0x00, 0x00,
>  				     6, sizeof(VirtIONet));
> -    if (!n)
> +    if (!n) {
> +	close(eventfds[0]);
> +	close(eventfds[1]);
>  	return NULL;
> +    }
>  
>      n->vdev.get_config = virtio_net_update_config;
>      n->vdev.get_features = virtio_net_get_features;
> @@ -341,8 +347,13 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
>      n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
>                                   virtio_net_can_receive, n);
>  
> -    n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
> -    n->tx_timer_active = 0;
> +    fcntl(eventfds[0], F_SETFL, O_NONBLOCK);
> +    n->tx_eventfds[0] = eventfds[0];
> +    fcntl(eventfds[1], F_SETFL, O_NONBLOCK);
> +    n->tx_eventfds[1] = eventfds[1];
> +
> +    n->vdev.pci_dev.unregister = virtio_net_uninit;
> +    qemu_set_fd_handler2(n->tx_eventfds[0], NULL, virtio_net_tx_event, NULL, n);
>  
>      register_savevm("virtio-net", virtio_net_id++, 1,
>  		    virtio_net_save, virtio_net_load, n);
>