[PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
@ 2011-03-17  0:12 Shirley Ma
  2011-03-17  5:02 ` Michael S. Tsirkin
                   ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Shirley Ma @ 2011-03-17  0:12 UTC (permalink / raw)
  To: Michael S. Tsirkin, Rusty Russell; +Cc: David Miller, kvm, netdev

Signed-off-by: Shirley Ma <xma@us.ibm.com>
---
 drivers/net/virtio_net.c |   39 ++++++++++++++++++++-------------------
 1 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 82dba5a..c603daa 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -23,6 +23,7 @@
 #include <linux/module.h>
 #include <linux/virtio.h>
 #include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
 #include <linux/scatterlist.h>
 #include <linux/if_vlan.h>
 #include <linux/slab.h>
@@ -509,19 +510,18 @@ again:
 	return received;
 }
 
-static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
+static void free_old_xmit_skbs(struct virtnet_info *vi)
 {
 	struct sk_buff *skb;
-	unsigned int len, tot_sgs = 0;
+	unsigned int len;
 
 	while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) {
 		pr_debug("Sent skb %p\n", skb);
 		vi->dev->stats.tx_bytes += skb->len;
 		vi->dev->stats.tx_packets++;
-		tot_sgs += skb_vnet_hdr(skb)->num_sg;
 		dev_kfree_skb_any(skb);
 	}
-	return tot_sgs;
+	return;
 }
 
 static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
@@ -574,11 +574,26 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
+	bool drop;
+	bool indirect = virtio_has_feature(vi->vdev,
+					   VIRTIO_RING_F_INDIRECT_DESC);
 	int capacity;
 
 	/* Free up any pending old buffers before queueing new ones. */
 	free_old_xmit_skbs(vi);
-
+	capacity = virtqueue_get_capacity(vi->svq);
+
+	/* Drop packet instead of stop queue for better performance */
+	drop = (capacity == 0 && indirect) ||
+	       ((capacity < MAX_SKB_FRAGS + 2) && !indirect);
+	if (unlikely(drop)) {
+		dev->stats.tx_fifo_errors++;
+		dev_warn(&dev->dev, "Unexpected TX queue failure: %d\n",
+			 capacity);
+		dev->stats.tx_dropped++;
+		kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
 	/* Try to transmit */
 	capacity = xmit_skb(vi, skb);
 
@@ -605,20 +620,6 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_orphan(skb);
 	nf_reset(skb);
 
-	/* Apparently nice girls don't return TX_BUSY; stop the queue
-	 * before it gets out of hand.  Naturally, this wastes entries. */
-	if (capacity < 2+MAX_SKB_FRAGS) {
-		netif_stop_queue(dev);
-		if (unlikely(!virtqueue_enable_cb(vi->svq))) {
-			/* More just got used, free them then recheck. */
-			capacity += free_old_xmit_skbs(vi);
-			if (capacity >= 2+MAX_SKB_FRAGS) {
-				netif_start_queue(dev);
-				virtqueue_disable_cb(vi->svq);
-			}
-		}
-	}
-
 	return NETDEV_TX_OK;
 }
 



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-17  0:12 [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop Shirley Ma
@ 2011-03-17  5:02 ` Michael S. Tsirkin
  2011-03-17 15:18   ` Shirley Ma
  2011-03-17  5:10 ` Rusty Russell
  2011-03-18 13:33 ` Herbert Xu
  2 siblings, 1 reply; 24+ messages in thread
From: Michael S. Tsirkin @ 2011-03-17  5:02 UTC (permalink / raw)
  To: Shirley Ma; +Cc: Rusty Russell, David Miller, kvm, netdev, Herbert Xu

On Wed, Mar 16, 2011 at 05:12:55PM -0700, Shirley Ma wrote:
> Signed-off-by: Shirley Ma <xma@us.ibm.com>
> ---
>  drivers/net/virtio_net.c |   39 ++++++++++++++++++++-------------------
>  1 files changed, 20 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 82dba5a..c603daa 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -23,6 +23,7 @@
>  #include <linux/module.h>
>  #include <linux/virtio.h>
>  #include <linux/virtio_net.h>
> +#include <linux/virtio_ring.h>
>  #include <linux/scatterlist.h>
>  #include <linux/if_vlan.h>
>  #include <linux/slab.h>
> @@ -509,19 +510,18 @@ again:
>  	return received;
>  }
>  
> -static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
> +static void free_old_xmit_skbs(struct virtnet_info *vi)
>  {
>  	struct sk_buff *skb;
> -	unsigned int len, tot_sgs = 0;
> +	unsigned int len;
>  
>  	while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) {
>  		pr_debug("Sent skb %p\n", skb);
>  		vi->dev->stats.tx_bytes += skb->len;
>  		vi->dev->stats.tx_packets++;
> -		tot_sgs += skb_vnet_hdr(skb)->num_sg;
>  		dev_kfree_skb_any(skb);
>  	}
> -	return tot_sgs;
> +	return;
>  }
>  
>  static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
> @@ -574,11 +574,26 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
>  static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>  {
>  	struct virtnet_info *vi = netdev_priv(dev);
> +	bool drop;
> +	bool indirect = virtio_has_feature(vi->vdev,
> +					   VIRTIO_RING_F_INDIRECT_DESC);
>  	int capacity;
>  
>  	/* Free up any pending old buffers before queueing new ones. */
>  	free_old_xmit_skbs(vi);
> -
> +	capacity = virtqueue_get_capacity(vi->svq);
> +
> +	/* Drop packet instead of stop queue for better performance */
> +	drop = (capacity == 0 && indirect) ||
> +	       ((capacity < MAX_SKB_FRAGS + 2) && !indirect);
> +	if (unlikely(drop)) {
> +		dev->stats.tx_fifo_errors++;
> +		dev_warn(&dev->dev, "Unexpected TX queue failure: %d\n",
> +			 capacity);
> +		dev->stats.tx_dropped++;
> +		kfree_skb(skb);
> +		return NETDEV_TX_OK;
> +	}
>  	/* Try to transmit */
>  	capacity = xmit_skb(vi, skb);

So, this just tries to make sure there's enough space for
max packet in the ring, if not - drop and return OK.
Why bother checking beforehand though?
If that's what we want to do, we can just call add_buf and see
if it fails?


> @@ -605,20 +620,6 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>  	skb_orphan(skb);
>  	nf_reset(skb);
>  
> -	/* Apparently nice girls don't return TX_BUSY; stop the queue
> -	 * before it gets out of hand.  Naturally, this wastes entries. */
> -	if (capacity < 2+MAX_SKB_FRAGS) {
> -		netif_stop_queue(dev);
> -		if (unlikely(!virtqueue_enable_cb(vi->svq))) {
> -			/* More just got used, free them then recheck. */
> -			capacity += free_old_xmit_skbs(vi);
> -			if (capacity >= 2+MAX_SKB_FRAGS) {
> -				netif_start_queue(dev);
> -				virtqueue_disable_cb(vi->svq);
> -			}
> -		}
> -	}
> -
>  	return NETDEV_TX_OK;
>  }
>  
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-17  5:02 ` Michael S. Tsirkin
@ 2011-03-17 15:18   ` Shirley Ma
  2011-03-18  3:28     ` Shirley Ma
  0 siblings, 1 reply; 24+ messages in thread
From: Shirley Ma @ 2011-03-17 15:18 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Rusty Russell, David Miller, kvm, netdev, Herbert Xu

On Thu, 2011-03-17 at 07:02 +0200, Michael S. Tsirkin wrote:
> So, this just tries to make sure there's enough space for
> max packet in the ring, if not - drop and return OK.
> Why bother checking beforehand though?
> If that's what we want to do, we can just call add_buf and see
> if it fails?

In add_buf, there is an additional kick, see below. I added check
capacity to avoid this, thought it would be better performance. I will
retest it w/i add_buf to see the performance difference.

        if (vq->num_free < out + in) {
                pr_debug("Can't add buf len %i - avail = %i\n",
                         out + in, vq->num_free);
                /* FIXME: for historical reasons, we force a notify here
if
                 * there are outgoing parts to the buffer.  Presumably
the
                 * host should service the ring ASAP. */
                if (out)
                        vq->notify(&vq->vq);
                END_USE(vq);
                return -ENOSPC;
        }

Thanks
Shirley


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-17 15:18   ` Shirley Ma
@ 2011-03-18  3:28     ` Shirley Ma
  2011-03-18 13:15       ` Michael S. Tsirkin
  0 siblings, 1 reply; 24+ messages in thread
From: Shirley Ma @ 2011-03-18  3:28 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Rusty Russell, David Miller, kvm, netdev, Herbert Xu

On Thu, 2011-03-17 at 08:18 -0700, Shirley Ma wrote:
> On Thu, 2011-03-17 at 07:02 +0200, Michael S. Tsirkin wrote:
> > So, this just tries to make sure there's enough space for
> > max packet in the ring, if not - drop and return OK.
> > Why bother checking beforehand though?
> > If that's what we want to do, we can just call add_buf and see
> > if it fails?
> 
> In add_buf, there is an additional kick, see below. I added check
> capacity to avoid this, thought it would be better performance. I will
> retest it w/i add_buf to see the performance difference.
> 
>         if (vq->num_free < out + in) {
>                 pr_debug("Can't add buf len %i - avail = %i\n",
>                          out + in, vq->num_free);
>                 /* FIXME: for historical reasons, we force a notify
> here
> if
>                  * there are outgoing parts to the buffer.  Presumably
> the
>                  * host should service the ring ASAP. */
>                 if (out)
>                         vq->notify(&vq->vq);
>                 END_USE(vq);
>                 return -ENOSPC;
>         }
> 

More test results:

UDP_STREAM test results (% is guest vcpu, guest has 2 vpus):

Send(netperf)
----

size	2.6.38-rc8	2.6.38-rc8+	2.6.38-rc8
			addbuf failure	check capacity
-----------------------------------------------------
1K	1541.0/50.14%	2169.1/50.03%	3018.9/50%
2K	1649.7/33.74%	3362.6/50.18%	4518.8/50.47%	
4K	2957.8/44.83%	5965.9/50.03%	9592.5/50%
8K	3788/39.01%	9852.8/50.25%	15483.8/50%
16K	4736.1/34.13%	14946.5/50.01%	21645.0/50%

Recv(netserver w/i recv errors)
----
1K	1541/8.36%	1554.4/9.67%	1675.1/11.26%
2K	1649.7/33.4%	1945.5/5.59%	2160.6/5.99%
4K	2556.3/5.07%	3044.8/7.12%	3118.9/7.86%
8K	3775/39.01%	4361/9/9.14%	4017.1/7.89%
16K	4466.4/8.56%	4435.2/10.95%	4446.8/9.92%

TCP_STREAM test results (% is guest vcpu, guest has two vcpus):

size	2.6.38-rc8	2.6.38-rc8+	2.6.38-rc8
			addbuf failure	check capacity
------------------------------------------------------
1K	2381.10/42.49%	5686.01/50.08%	5599.15/50.42%
2K	3205.08/49.18%	8675.93/48.59%	9241.86/48.42%	
4K	5231.24/34.12%	9309.67/42.07%	9321.87/40.94%
8K	7952.74/35.85%	9001.95/38.26%	9265.45/37.63%
16K	8260.68/35.07%	7911.52/34.35%	8310.29/34.28%
64K	9103.75/28.98%	9219.12/31.52%	9094.38/29.44%

qemu-kvm host cpu utilization also saved for both addbuf failure and
check capacity patches, vhost cpu utilization are similar in all case.

Looks like the additional guest notify in add_buf doesn't cost that much
than I thought to be.

Thanks
Shirley


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-18  3:28     ` Shirley Ma
@ 2011-03-18 13:15       ` Michael S. Tsirkin
  2011-03-18 16:54         ` Shirley Ma
  0 siblings, 1 reply; 24+ messages in thread
From: Michael S. Tsirkin @ 2011-03-18 13:15 UTC (permalink / raw)
  To: Shirley Ma; +Cc: Rusty Russell, David Miller, kvm, netdev, Herbert Xu

On Thu, Mar 17, 2011 at 08:28:47PM -0700, Shirley Ma wrote:
> On Thu, 2011-03-17 at 08:18 -0700, Shirley Ma wrote:
> > On Thu, 2011-03-17 at 07:02 +0200, Michael S. Tsirkin wrote:
> > > So, this just tries to make sure there's enough space for
> > > max packet in the ring, if not - drop and return OK.
> > > Why bother checking beforehand though?
> > > If that's what we want to do, we can just call add_buf and see
> > > if it fails?
> > 
> > In add_buf, there is an additional kick, see below. I added check
> > capacity to avoid this, thought it would be better performance. I will
> > retest it w/i add_buf to see the performance difference.
> > 
> >         if (vq->num_free < out + in) {
> >                 pr_debug("Can't add buf len %i - avail = %i\n",
> >                          out + in, vq->num_free);
> >                 /* FIXME: for historical reasons, we force a notify
> > here
> > if
> >                  * there are outgoing parts to the buffer.  Presumably
> > the
> >                  * host should service the ring ASAP. */
> >                 if (out)
> >                         vq->notify(&vq->vq);
> >                 END_USE(vq);
> >                 return -ENOSPC;
> >         }
> > 

Rusty, could you pls clarify what are the historical reasons here?
Are they still valid?
If yes we could dedicate a feature flag to disabling this,
or guess that the host is new by looking at some
other feature flag.

> More test results:
> 
> UDP_STREAM test results (% is guest vcpu, guest has 2 vpus):
> 
> Send(netperf)
> ----
> 
> size	2.6.38-rc8	2.6.38-rc8+	2.6.38-rc8
> 			addbuf failure	check capacity
> -----------------------------------------------------
> 1K	1541.0/50.14%	2169.1/50.03%	3018.9/50%
> 2K	1649.7/33.74%	3362.6/50.18%	4518.8/50.47%	
> 4K	2957.8/44.83%	5965.9/50.03%	9592.5/50%
> 8K	3788/39.01%	9852.8/50.25%	15483.8/50%
> 16K	4736.1/34.13%	14946.5/50.01%	21645.0/50%

Is this the local or remote throughput?
With UDP_STREAM you are mostly interested in
remote throughput, local one can be pretty high
while most packets get dropped.

> Looks like the additional guest notify in add_buf doesn't cost that much
> than I thought to be.
> 
> Thanks
> Shirley

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-18 13:15       ` Michael S. Tsirkin
@ 2011-03-18 16:54         ` Shirley Ma
  0 siblings, 0 replies; 24+ messages in thread
From: Shirley Ma @ 2011-03-18 16:54 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Rusty Russell, David Miller, kvm, netdev, Herbert Xu

On Fri, 2011-03-18 at 15:15 +0200, Michael S. Tsirkin wrote:
> Is this the local or remote throughput?
> With UDP_STREAM you are mostly interested in
> remote throughput, local one can be pretty high
> while most packets get dropped. 

This is local throughput. Remote is called recv(netserver) data. With
default wmem/rmem, there are so many recv errors. I haven't tuned the
results yet.

Thanks
Shirley


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-17  0:12 [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop Shirley Ma
  2011-03-17  5:02 ` Michael S. Tsirkin
@ 2011-03-17  5:10 ` Rusty Russell
  2011-03-17 15:10   ` Shirley Ma
  2011-03-18 13:33 ` Herbert Xu
  2 siblings, 1 reply; 24+ messages in thread
From: Rusty Russell @ 2011-03-17  5:10 UTC (permalink / raw)
  To: Shirley Ma, Michael S. Tsirkin; +Cc: David Miller, kvm, netdev

On Wed, 16 Mar 2011 17:12:55 -0700, Shirley Ma <mashirle@us.ibm.com> wrote:
> Signed-off-by: Shirley Ma <xma@us.ibm.com>

This is fascinating... and deeply weird.

OK, what's the difference between calling xmit_skb and ignoring failure,
and this patch which figures out it's going to fail before calling
xmit_skb?

ie. what if you *just* delete this:

> @@ -605,20 +620,6 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>  	skb_orphan(skb);
>  	nf_reset(skb);
>  
> -	/* Apparently nice girls don't return TX_BUSY; stop the queue
> -	 * before it gets out of hand.  Naturally, this wastes entries. */
> -	if (capacity < 2+MAX_SKB_FRAGS) {
> -		netif_stop_queue(dev);
> -		if (unlikely(!virtqueue_enable_cb(vi->svq))) {
> -			/* More just got used, free them then recheck. */
> -			capacity += free_old_xmit_skbs(vi);
> -			if (capacity >= 2+MAX_SKB_FRAGS) {
> -				netif_start_queue(dev);
> -				virtqueue_disable_cb(vi->svq);
> -			}
> -		}
> -	}
> -
>  	return NETDEV_TX_OK;
>  }

Thanks!
Rusty.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-17  5:10 ` Rusty Russell
@ 2011-03-17 15:10   ` Shirley Ma
  0 siblings, 0 replies; 24+ messages in thread
From: Shirley Ma @ 2011-03-17 15:10 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Michael S. Tsirkin, David Miller, kvm, netdev

On Thu, 2011-03-17 at 15:40 +1030, Rusty Russell wrote:
> This is fascinating... and deeply weird.
> 
> OK, what's the difference between calling xmit_skb and ignoring
> failure,
> and this patch which figures out it's going to fail before calling
> xmit_skb?
> 
> ie. what if you *just* delete this:

Somehow what was in my mind, add_buf is more expensive than simply
checked ring capacity. I retest it with your and Michael's suggestion
here.

Thanks
Shirley


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-17  0:12 [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop Shirley Ma
  2011-03-17  5:02 ` Michael S. Tsirkin
  2011-03-17  5:10 ` Rusty Russell
@ 2011-03-18 13:33 ` Herbert Xu
  2011-03-19  1:41   ` Shirley Ma
  2 siblings, 1 reply; 24+ messages in thread
From: Herbert Xu @ 2011-03-18 13:33 UTC (permalink / raw)
  To: Shirley Ma; +Cc: mst, rusty, davem, kvm, netdev

Shirley Ma <mashirle@us.ibm.com> wrote:
>
> +       /* Drop packet instead of stop queue for better performance */

I would like to see some justification as to why this is the right
way to go and not just papering over the real problem.

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-18 13:33 ` Herbert Xu
@ 2011-03-19  1:41   ` Shirley Ma
  2011-03-21 18:03     ` Shirley Ma
  0 siblings, 1 reply; 24+ messages in thread
From: Shirley Ma @ 2011-03-19  1:41 UTC (permalink / raw)
  To: Herbert Xu; +Cc: mst, rusty, davem, kvm, netdev

On Fri, 2011-03-18 at 08:33 -0500, Herbert Xu wrote:
> Shirley Ma <mashirle@us.ibm.com> wrote:
> >
> > +       /* Drop packet instead of stop queue for better performance
> */
> 
> I would like to see some justification as to why this is the right
> way to go and not just papering over the real problem. 

Fair. KVM guest virtio_net TX queue stop/restart is pretty expensive,
which involves:

1. Guest enable callback: one memory barrier, interrupt flag set

2. Host signals guest: one memory barrier, and a TX interrupt from host
to KVM guest through evenfd_signal.

Most of the workload so far we barely see TX over run, except for small
messages TCP_STREAM. 

For small message size TCP_STREAM workload, no matter how big the TX
queue size is, it always causes overrun. I even re-enable the TX queue
when it's empty, it still hits TX overrun again and again.

Somehow KVM guest and host is not in pace on processing small packets. I
tried to pin each thread to different CPU, it didn't help. So it didn't
seem to be scheduling related.

>From the performance results, we can see dramatically performance gain
with this patch.

I would like to dig out the real reason why host can't in pace with
guest, but haven't figured it out in month, that's the reason I held
this patch for a while. However if anyone can give me any ideas on how
to debug the real problem, I am willing to try it out. 

Thanks
Shirley

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-19  1:41   ` Shirley Ma
@ 2011-03-21 18:03     ` Shirley Ma
  2011-03-22 11:36       ` Michael S. Tsirkin
  0 siblings, 1 reply; 24+ messages in thread
From: Shirley Ma @ 2011-03-21 18:03 UTC (permalink / raw)
  To: Herbert Xu; +Cc: mst, rusty, davem, kvm, netdev

On Fri, 2011-03-18 at 18:41 -0700, Shirley Ma wrote:
> > > +       /* Drop packet instead of stop queue for better
> performance
> > */
> > 
> > I would like to see some justification as to why this is the right
> > way to go and not just papering over the real problem. 
> 
> Fair. KVM guest virtio_net TX queue stop/restart is pretty expensive,
> which involves:
> 
> 1. Guest enable callback: one memory barrier, interrupt flag set

Missed this cost: for history reason, it also involves a guest exit from
I/O write (PCI_QUEUE_NOTIFY).

> 2. Host signals guest: one memory barrier, and a TX interrupt from
> host
> to KVM guest through evenfd_signal.
> 
> 
> Most of the workload so far we barely see TX over run, except for
> small
> messages TCP_STREAM. 
> 
> For small message size TCP_STREAM workload, no matter how big the TX
> queue size is, it always causes overrun. I even re-enable the TX queue
> when it's empty, it still hits TX overrun again and again.
> 
> Somehow KVM guest and host is not in pace on processing small packets.
> I
> tried to pin each thread to different CPU, it didn't help. So it
> didn't
> seem to be scheduling related.
> 
> >From the performance results, we can see dramatically performance
> gain
> with this patch.
> 
> I would like to dig out the real reason why host can't in pace with
> guest, but haven't figured it out in month, that's the reason I held
> this patch for a while. However if anyone can give me any ideas on how
> to debug the real problem, I am willing to try it out. 


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-21 18:03     ` Shirley Ma
@ 2011-03-22 11:36       ` Michael S. Tsirkin
  2011-03-23  2:26         ` Shirley Ma
  2011-03-24  0:16         ` Rusty Russell
  0 siblings, 2 replies; 24+ messages in thread
From: Michael S. Tsirkin @ 2011-03-22 11:36 UTC (permalink / raw)
  To: Shirley Ma; +Cc: Herbert Xu, rusty, davem, kvm, netdev

On Mon, Mar 21, 2011 at 11:03:07AM -0700, Shirley Ma wrote:
> On Fri, 2011-03-18 at 18:41 -0700, Shirley Ma wrote:
> > > > +       /* Drop packet instead of stop queue for better
> > performance
> > > */
> > > 
> > > I would like to see some justification as to why this is the right
> > > way to go and not just papering over the real problem. 
> > 
> > Fair. KVM guest virtio_net TX queue stop/restart is pretty expensive,
> > which involves:
> > 
> > 1. Guest enable callback: one memory barrier, interrupt flag set
> 
> Missed this cost: for history reason, it also involves a guest exit from
> I/O write (PCI_QUEUE_NOTIFY).

OK, after some research, it looks like the reason was the tx timer that
qemu used to use. So the hack of avoiding the add_buf call will
avoid this kick and so break these hosts.
I guess we can add a feature bit to detect a new host
and so avoid the kick. We are running low on feature bits
unfortunately, but just fo testing, could you quantify the difference
that this makes using the following patch:


diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index cc2f73e..6106017 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -185,11 +185,6 @@ int virtqueue_add_buf_gfp(struct virtqueue *_vq,
 	if (vq->num_free < out + in) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
 			 out + in, vq->num_free);
-		/* FIXME: for historical reasons, we force a notify here if
-		 * there are outgoing parts to the buffer.  Presumably the
-		 * host should service the ring ASAP. */
-		if (out)
-			vq->notify(&vq->vq);
 		END_USE(vq);
 		return -ENOSPC;
 	}

-- 
MST

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-22 11:36       ` Michael S. Tsirkin
@ 2011-03-23  2:26         ` Shirley Ma
  2011-03-24  0:30           ` Rusty Russell
  2011-03-24  0:16         ` Rusty Russell
  1 sibling, 1 reply; 24+ messages in thread
From: Shirley Ma @ 2011-03-23  2:26 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Herbert Xu, rusty, davem, kvm, netdev

On Tue, 2011-03-22 at 13:36 +0200, Michael S. Tsirkin wrote:
> diff --git a/drivers/virtio/virtio_ring.c
> b/drivers/virtio/virtio_ring.c
> index cc2f73e..6106017 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -185,11 +185,6 @@ int virtqueue_add_buf_gfp(struct virtqueue *_vq,
>         if (vq->num_free < out + in) {
>                 pr_debug("Can't add buf len %i - avail = %i\n",
>                          out + in, vq->num_free);
> -               /* FIXME: for historical reasons, we force a notify
> here if
> -                * there are outgoing parts to the buffer.  Presumably
> the
> -                * host should service the ring ASAP. */
> -               if (out)
> -                       vq->notify(&vq->vq);
>                 END_USE(vq);
>                 return -ENOSPC;
>         }
> 

With simply removing the notify here, it does help the case when TX
overrun hits too often, for example for 1K message size, the single
TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s.

Thanks
Shirley


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-23  2:26         ` Shirley Ma
@ 2011-03-24  0:30           ` Rusty Russell
  2011-03-24  4:14             ` Shirley Ma
  2011-03-24 14:28             ` Michael S. Tsirkin
  0 siblings, 2 replies; 24+ messages in thread
From: Rusty Russell @ 2011-03-24  0:30 UTC (permalink / raw)
  To: Shirley Ma, Michael S. Tsirkin; +Cc: Herbert Xu, davem, kvm, netdev

> With simply removing the notify here, it does help the case when TX
> overrun hits too often, for example for 1K message size, the single
> TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s.

OK, we'll be getting rid of the "kick on full", so please delete that on
all benchmarks.

Now, does the capacity check before add_buf() still win anything?  I
can't see how unless we have some weird bug.

Once we've sorted that out, we should look at the more radical change
of publishing last_used and using that to intuit whether interrupts
should be sent.  If we're not careful with ordering and barriers that
could introduce more bugs.

Anything else on the optimization agenda I've missed?

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-24  0:30           ` Rusty Russell
@ 2011-03-24  4:14             ` Shirley Ma
  2011-03-24 14:28             ` Michael S. Tsirkin
  1 sibling, 0 replies; 24+ messages in thread
From: Shirley Ma @ 2011-03-24  4:14 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Michael S. Tsirkin, Herbert Xu, davem, kvm, netdev

On Thu, 2011-03-24 at 11:00 +1030, Rusty Russell wrote:
> > With simply removing the notify here, it does help the case when TX
> > overrun hits too often, for example for 1K message size, the single
> > TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s.
> 
> OK, we'll be getting rid of the "kick on full", so please delete that
> on
> all benchmarks.
> 
> Now, does the capacity check before add_buf() still win anything?  I
> can't see how unless we have some weird bug.
> 
> Once we've sorted that out, we should look at the more radical change
> of publishing last_used and using that to intuit whether interrupts
> should be sent.  If we're not careful with ordering and barriers that
> could introduce more bugs.

Without the kick, it's not necessary for capacity check. I am
regenerating the patch with add_buf check and summit the patch after
passing all tests. 

> Anything else on the optimization agenda I've missed?

Tom found small performance gain with freeing the used buffers when half
of the ring is full in TCP_RR workload. 

I think we need a new API in virtio, which frees all used buffers at
once, I am testing the performance now, the new API looks like:

 drivers/virtio/virtio_ring.c |   40 +++++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |    6 +++++

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index cc2f73e..6d2dc16 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -329,6 +329,46 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
 
+int virtqueue_free_used(struct virtqueue *_vq, void (*free)(void *))
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int i;
+	void *buf;
+
+	START_USE(vq);
+
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return NULL;
+	}
+
+	/* Only get used array entries after they have been exposed by host. */
+	virtio_rmb();
+
+	while (vq->last_used_idx != vq->vring.used->idx) {
+		i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
+
+		if (unlikely(i >= vq->vring.num)) {
+			BAD_RING(vq, "id %u out of range\n", i);
+			return NULL;
+		}
+		if (unlikely(!vq->data[i])) {
+			BAD_RING(vq, "id %u is not a head!\n", i);
+			return NULL;
+		}
+
+		/* detach_buf clears data, so grab it now. */
+		buf = vq->data[i];
+		detach_buf(vq, i);
+		free(buf);
+		vq->last_used_idx++;
+	}
+	END_USE(vq);
+	return vq->num_free;
+}
+
+EXPORT_SYMBOL_GPL(virtqueue_free_used);
+
 void virtqueue_disable_cb(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index aff5b4f..19acc66 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -42,6 +42,10 @@ struct virtqueue {
  *	vq: the struct virtqueue we're talking about.
  *	len: the length written into the buffer
  *	Returns NULL or the "data" token handed to add_buf.
+ * virtqueue_free_used: free all used buffers in the queue
+ *	vq: the struct virtqueue we're talking about.
+ *	free: free buf function from caller.
+ *	Returns remaining capacity of the queue.
  * virtqueue_disable_cb: disable callbacks
  *	vq: the struct virtqueue we're talking about.
  *	Note that this is not necessarily synchronous, hence unreliable and only
@@ -82,6 +86,8 @@ void virtqueue_kick(struct virtqueue *vq);
 
 void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
 
+int virtqueue_free_used(struct virtqueue *vq, void (*free)(void *buf));
+
 void virtqueue_disable_cb(struct virtqueue *vq);
 
 bool virtqueue_enable_cb(struct virtqueue *vq);


Thanks
Shirley



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-24  0:30           ` Rusty Russell
  2011-03-24  4:14             ` Shirley Ma
@ 2011-03-24 14:28             ` Michael S. Tsirkin
  2011-03-24 17:46               ` Shirley Ma
  2011-03-25  4:50               ` Rusty Russell
  1 sibling, 2 replies; 24+ messages in thread
From: Michael S. Tsirkin @ 2011-03-24 14:28 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Shirley Ma, Herbert Xu, davem, kvm, netdev

On Thu, Mar 24, 2011 at 11:00:53AM +1030, Rusty Russell wrote:
> > With simply removing the notify here, it does help the case when TX
> > overrun hits too often, for example for 1K message size, the single
> > TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s.
> 
> OK, we'll be getting rid of the "kick on full", so please delete that on
> all benchmarks.
> 
> Now, does the capacity check before add_buf() still win anything?  I
> can't see how unless we have some weird bug.
> 
> Once we've sorted that out, we should look at the more radical change
> of publishing last_used and using that to intuit whether interrupts
> should be sent.  If we're not careful with ordering and barriers that
> could introduce more bugs.

Right. I am working on this, and trying to be careful.
One thing I'm in doubt about: sometimes we just want to
disable interrupts. Should still use flags in that case?
I thought that if we make the published index 0 to vq->num - 1,
then a special value in the index field could disable
interrupts completely. We could even reuse the space
for the flags field to stick the index in. Too complex?

> Anything else on the optimization agenda I've missed?
> 
> Thanks,
> Rusty.

Several other things I am looking at, wellcome cooperation:
1. It's probably a good idea to update avail index
   immediately instead of upon kick: for RX
   this might help parallelism with the host.

2. Adding an API to add a single buffer instead of s/g,
   seems to help a bit.

3. For TX sometimes we free a single buffer, sometimes
   a ton of them, which might make the transmit latency
   vary. It's probably a good idea to limit this,
   maybe free the minimal number possible to keep the device
   going without stops, maybe free up to MAX_SKB_FRAGS.

4. If the ring is full, we now notify right after
   the first entry is consumed. For TX this is suboptimal,
   we should try delaying the interrupt on host.

More ideas, would be nice if someone can try them out:
1. We are allocating/freeing buffers for indirect descriptors.
   Use some kind of pool instead?
   And we could preformat part of the descriptor.

2. I didn't have time to work on virtio2 ideas presented
   at the kvm forum yet, any takers?

-- 
MST

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-24 14:28             ` Michael S. Tsirkin
@ 2011-03-24 17:46               ` Shirley Ma
  2011-03-24 18:10                 ` Michael S. Tsirkin
  2011-03-25  4:51                 ` Rusty Russell
  2011-03-25  4:50               ` Rusty Russell
  1 sibling, 2 replies; 24+ messages in thread
From: Shirley Ma @ 2011-03-24 17:46 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Rusty Russell, Herbert Xu, davem, kvm, netdev

On Thu, 2011-03-24 at 16:28 +0200, Michael S. Tsirkin wrote:
> On Thu, Mar 24, 2011 at 11:00:53AM +1030, Rusty Russell wrote:
> > > With simply removing the notify here, it does help the case when TX
> > > overrun hits too often, for example for 1K message size, the single
> > > TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s.
> > 
> > OK, we'll be getting rid of the "kick on full", so please delete that on
> > all benchmarks.
> > 
> > Now, does the capacity check before add_buf() still win anything?  I
> > can't see how unless we have some weird bug.
> > 
> > Once we've sorted that out, we should look at the more radical change
> > of publishing last_used and using that to intuit whether interrupts
> > should be sent.  If we're not careful with ordering and barriers that
> > could introduce more bugs.
> 
> Right. I am working on this, and trying to be careful.
> One thing I'm in doubt about: sometimes we just want to
> disable interrupts. Should still use flags in that case?
> I thought that if we make the published index 0 to vq->num - 1,
> then a special value in the index field could disable
> interrupts completely. We could even reuse the space
> for the flags field to stick the index in. Too complex?
> > Anything else on the optimization agenda I've missed?
> > 
> > Thanks,
> > Rusty.
> 
> Several other things I am looking at, wellcome cooperation:
> 1. It's probably a good idea to update avail index
>    immediately instead of upon kick: for RX
>    this might help parallelism with the host.
Is that possible to use the same idea for publishing last used idx to
publish avail idx? Then we can save guest iowrite/exits.

> 2. Adding an API to add a single buffer instead of s/g,
>    seems to help a bit.
> 
> 3. For TX sometimes we free a single buffer, sometimes
>    a ton of them, which might make the transmit latency
>    vary. It's probably a good idea to limit this,
>    maybe free the minimal number possible to keep the device
>    going without stops, maybe free up to MAX_SKB_FRAGS.

I am playing with it now, to collect more perf data to see what's the
best value to free number of used buffers.

> 4. If the ring is full, we now notify right after
>    the first entry is consumed. For TX this is suboptimal,
>    we should try delaying the interrupt on host.

> More ideas, would be nice if someone can try them out:
> 1. We are allocating/freeing buffers for indirect descriptors.
>    Use some kind of pool instead?
>    And we could preformat part of the descriptor.
> 2. I didn't have time to work on virtio2 ideas presented
>    at the kvm forum yet, any takers?
If I have time, I will look at this.

Thanks
Shirley



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-24 17:46               ` Shirley Ma
@ 2011-03-24 18:10                 ` Michael S. Tsirkin
  2011-03-25  4:51                 ` Rusty Russell
  1 sibling, 0 replies; 24+ messages in thread
From: Michael S. Tsirkin @ 2011-03-24 18:10 UTC (permalink / raw)
  To: Shirley Ma; +Cc: Rusty Russell, Herbert Xu, davem, kvm, netdev

On Thu, Mar 24, 2011 at 10:46:49AM -0700, Shirley Ma wrote:
> On Thu, 2011-03-24 at 16:28 +0200, Michael S. Tsirkin wrote:
> > On Thu, Mar 24, 2011 at 11:00:53AM +1030, Rusty Russell wrote:
> > > > With simply removing the notify here, it does help the case when TX
> > > > overrun hits too often, for example for 1K message size, the single
> > > > TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s.
> > > 
> > > OK, we'll be getting rid of the "kick on full", so please delete that on
> > > all benchmarks.
> > > 
> > > Now, does the capacity check before add_buf() still win anything?  I
> > > can't see how unless we have some weird bug.
> > > 
> > > Once we've sorted that out, we should look at the more radical change
> > > of publishing last_used and using that to intuit whether interrupts
> > > should be sent.  If we're not careful with ordering and barriers that
> > > could introduce more bugs.
> > 
> > Right. I am working on this, and trying to be careful.
> > One thing I'm in doubt about: sometimes we just want to
> > disable interrupts. Should still use flags in that case?
> > I thought that if we make the published index 0 to vq->num - 1,
> > then a special value in the index field could disable
> > interrupts completely. We could even reuse the space
> > for the flags field to stick the index in. Too complex?
> > > Anything else on the optimization agenda I've missed?
> > > 
> > > Thanks,
> > > Rusty.
> > 
> > Several other things I am looking at, wellcome cooperation:
> > 1. It's probably a good idea to update avail index
> >    immediately instead of upon kick: for RX
> >    this might help parallelism with the host.
> Is that possible to use the same idea for publishing last used idx to
> publish avail idx? Then we can save guest iowrite/exits.

Yes, but unrelated to 1 above :)

> > 2. Adding an API to add a single buffer instead of s/g,
> >    seems to help a bit.
> > 
> > 3. For TX sometimes we free a single buffer, sometimes
> >    a ton of them, which might make the transmit latency
> >    vary. It's probably a good idea to limit this,
> >    maybe free the minimal number possible to keep the device
> >    going without stops, maybe free up to MAX_SKB_FRAGS.
> 
> I am playing with it now, to collect more perf data to see what's the
> best value to free number of used buffers.

The best IMO is to keep the number of freed buffers constant
so that we have more or less identical overhead for each packet.

> > 4. If the ring is full, we now notify right after
> >    the first entry is consumed. For TX this is suboptimal,
> >    we should try delaying the interrupt on host.
> 
> > More ideas, would be nice if someone can try them out:
> > 1. We are allocating/freeing buffers for indirect descriptors.
> >    Use some kind of pool instead?
> >    And we could preformat part of the descriptor.
> > 2. I didn't have time to work on virtio2 ideas presented
> >    at the kvm forum yet, any takers?
> If I have time, I will look at this.
> 
> Thanks
> Shirley
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-24 17:46               ` Shirley Ma
  2011-03-24 18:10                 ` Michael S. Tsirkin
@ 2011-03-25  4:51                 ` Rusty Russell
  1 sibling, 0 replies; 24+ messages in thread
From: Rusty Russell @ 2011-03-25  4:51 UTC (permalink / raw)
  To: Shirley Ma, Michael S. Tsirkin; +Cc: Herbert Xu, davem, kvm, netdev

On Thu, 24 Mar 2011 10:46:49 -0700, Shirley Ma <mashirle@us.ibm.com> wrote:
> On Thu, 2011-03-24 at 16:28 +0200, Michael S. Tsirkin wrote:
> > Several other things I am looking at, wellcome cooperation:
> > 1. It's probably a good idea to update avail index
> >    immediately instead of upon kick: for RX
> >    this might help parallelism with the host.
> Is that possible to use the same idea for publishing last used idx to
> publish avail idx? Then we can save guest iowrite/exits.

Yes, it should be symmetrical.  Test independently of course, but the
same logic applies.

Thanks!
Rusty.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-24 14:28             ` Michael S. Tsirkin
  2011-03-24 17:46               ` Shirley Ma
@ 2011-03-25  4:50               ` Rusty Russell
  2011-03-27  7:52                 ` Michael S. Tsirkin
  1 sibling, 1 reply; 24+ messages in thread
From: Rusty Russell @ 2011-03-25  4:50 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Shirley Ma, Herbert Xu, davem, kvm, netdev

On Thu, 24 Mar 2011 16:28:22 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Thu, Mar 24, 2011 at 11:00:53AM +1030, Rusty Russell wrote:
> > > With simply removing the notify here, it does help the case when TX
> > > overrun hits too often, for example for 1K message size, the single
> > > TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s.
> > 
> > OK, we'll be getting rid of the "kick on full", so please delete that on
> > all benchmarks.
> > 
> > Now, does the capacity check before add_buf() still win anything?  I
> > can't see how unless we have some weird bug.
> > 
> > Once we've sorted that out, we should look at the more radical change
> > of publishing last_used and using that to intuit whether interrupts
> > should be sent.  If we're not careful with ordering and barriers that
> > could introduce more bugs.
> 
> Right. I am working on this, and trying to be careful.
> One thing I'm in doubt about: sometimes we just want to
> disable interrupts. Should still use flags in that case?
> I thought that if we make the published index 0 to vq->num - 1,
> then a special value in the index field could disable
> interrupts completely. We could even reuse the space
> for the flags field to stick the index in. Too complex?

Making the index free-running avoids the "full or empty" confusion, plus
offers and extra debugging insight.

I think that if they really want to disable interrrupts, the flag should
still work, and when the client accepts the "publish last_idx" feature
they are accepting that interrupts may be omitted if they haven't
updated last_idx yet.

> > Anything else on the optimization agenda I've missed?
> > 
> > Thanks,
> > Rusty.
> 
> Several other things I am looking at, wellcome cooperation:
> 1. It's probably a good idea to update avail index
>    immediately instead of upon kick: for RX
>    this might help parallelism with the host.

Yes, once we've done everything else, we should measure this.  It makes
sense.

> 2. Adding an API to add a single buffer instead of s/g,
>    seems to help a bit.

This goes last, since it's kind of an ugly hack, but all internal to
Linux if we decide it's a win.

> 3. For TX sometimes we free a single buffer, sometimes
>    a ton of them, which might make the transmit latency
>    vary. It's probably a good idea to limit this,
>    maybe free the minimal number possible to keep the device
>    going without stops, maybe free up to MAX_SKB_FRAGS.

This kind of heuristic is going to be quite variable depending on
circumstance, I think, so it's a lot of work to make sure we get it
right.

> 4. If the ring is full, we now notify right after
>    the first entry is consumed. For TX this is suboptimal,
>    we should try delaying the interrupt on host.

Lguest already does that: only sends an interrupt when it's run out of
things to do.  It does update the used ring, however, as it processes
them.

This seems sensible to me, but needs to be measured separately as well.

> More ideas, would be nice if someone can try them out:
> 1. We are allocating/freeing buffers for indirect descriptors.
>    Use some kind of pool instead?
>    And we could preformat part of the descriptor.

We need some poolish mechanism for virtio_blk too; perhaps an allocation
callback which both can use (virtio_blk to alloc from a pool, virtio_net
to recycle?).

Along similar lines to preformatting, we could actually try to prepend
the skb_vnet_hdr to the vnet data, and use a single descriptor for the
hdr and the first part of the packet.

Though IIRC, qemu's virtio barfs if the first descriptor isn't just the
hdr (barf...).

> 2. I didn't have time to work on virtio2 ideas presented
>    at the kvm forum yet, any takers?

I didn't even attend.  But I think that virtio is moribund for the
moment; there wasn't enough demand and it's clear that there are
optimization unexplored in virtio1.

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-25  4:50               ` Rusty Russell
@ 2011-03-27  7:52                 ` Michael S. Tsirkin
  2011-04-04  6:13                   ` Rusty Russell
  0 siblings, 1 reply; 24+ messages in thread
From: Michael S. Tsirkin @ 2011-03-27  7:52 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Shirley Ma, Herbert Xu, davem, kvm, netdev

On Fri, Mar 25, 2011 at 03:20:46PM +1030, Rusty Russell wrote:
> > 3. For TX sometimes we free a single buffer, sometimes
> >    a ton of them, which might make the transmit latency
> >    vary. It's probably a good idea to limit this,
> >    maybe free the minimal number possible to keep the device
> >    going without stops, maybe free up to MAX_SKB_FRAGS.
> 
> This kind of heuristic is going to be quite variable depending on
> circumstance, I think, so it's a lot of work to make sure we get it
> right.

Hmm, trying to keep the amount of work per descriptor
constant seems to make sense though, no?
Latency variations are not good for either RT uses or
protocols such as TCP.

> > 4. If the ring is full, we now notify right after
> >    the first entry is consumed. For TX this is suboptimal,
> >    we should try delaying the interrupt on host.
> 
> Lguest already does that: only sends an interrupt when it's run out of
> things to do.  It does update the used ring, however, as it processes
> them.

There are many approaches here I suspect something like
interrupt after half work is done might be better for
parallelism.

> 
> This seems sensible to me, but needs to be measured separately as well.

Agree.

> > More ideas, would be nice if someone can try them out:
> > 1. We are allocating/freeing buffers for indirect descriptors.
> >    Use some kind of pool instead?
> >    And we could preformat part of the descriptor.
> 
> We need some poolish mechanism for virtio_blk too; perhaps an allocation
> callback which both can use (virtio_blk to alloc from a pool, virtio_net
> to recycle?).

BTW for recycling, need to be careful about numa effects:
probably store cpu id and reallocate if we switch cpus ...
(or noma nodes - unfortunately not always described correctly).

> Along similar lines to preformatting, we could actually try to prepend
> the skb_vnet_hdr to the vnet data, and use a single descriptor for the
> hdr and the first part of the packet.
> 
> Though IIRC, qemu's virtio barfs if the first descriptor isn't just the
> hdr (barf...).

Maybe we can try fixing this before adding more flags,
then e.g. publish used flag can be resued to also
tell us layout is flexible. Or just add a feature flag for that.

> > 2. I didn't have time to work on virtio2 ideas presented
> >    at the kvm forum yet, any takers?
> 
> I didn't even attend.

Hmm, right. But what was presented there was discussed on list as well:
a single R/W descriptor ring with valid bit instead of 2 rings
+ a descriptor array.

>  But I think that virtio is moribund for the
> moment; there wasn't enough demand and it's clear that there are
> optimization unexplored in virtio1.

I agree absolutely that not all lessons has been learned,
playing with different ring layouts would make at least
an interesting paper IMO.

-- 
MST

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-27  7:52                 ` Michael S. Tsirkin
@ 2011-04-04  6:13                   ` Rusty Russell
  0 siblings, 0 replies; 24+ messages in thread
From: Rusty Russell @ 2011-04-04  6:13 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Shirley Ma, Herbert Xu, davem, kvm, netdev

On Sun, 27 Mar 2011 09:52:54 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > Though IIRC, qemu's virtio barfs if the first descriptor isn't just the
> > hdr (barf...).
> 
> Maybe we can try fixing this before adding more flags,
> then e.g. publish used flag can be resued to also
> tell us layout is flexible. Or just add a feature flag for that.

We should probably do this at some stage, yes.

> > > 2. I didn't have time to work on virtio2 ideas presented
> > >    at the kvm forum yet, any takers?
> > 
> > I didn't even attend.
> 
> Hmm, right. But what was presented there was discussed on list as well:
> a single R/W descriptor ring with valid bit instead of 2 rings
> + a descriptor array.

I'll be happy when we reach the point that the extra cacheline is
hurting us :)

Then we should do direct descriptors w/ a cookie as the value to hand
back when finished.  That seems to be close to optimal.

> I agree absolutely that not all lessons has been learned,
> playing with different ring layouts would make at least
> an interesting paper IMO.

Yes, I'd like to see the results...

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-22 11:36       ` Michael S. Tsirkin
  2011-03-23  2:26         ` Shirley Ma
@ 2011-03-24  0:16         ` Rusty Russell
  2011-03-24  6:39           ` Michael S. Tsirkin
  1 sibling, 1 reply; 24+ messages in thread
From: Rusty Russell @ 2011-03-24  0:16 UTC (permalink / raw)
  To: Michael S. Tsirkin, Shirley Ma; +Cc: Herbert Xu, davem, kvm, netdev

On Tue, 22 Mar 2011 13:36:50 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Mon, Mar 21, 2011 at 11:03:07AM -0700, Shirley Ma wrote:
> > On Fri, 2011-03-18 at 18:41 -0700, Shirley Ma wrote:
> > > > > +       /* Drop packet instead of stop queue for better
> > > performance
> > > > */
> > > > 
> > > > I would like to see some justification as to why this is the right
> > > > way to go and not just papering over the real problem. 
> > > 
> > > Fair. KVM guest virtio_net TX queue stop/restart is pretty expensive,
> > > which involves:
> > > 
> > > 1. Guest enable callback: one memory barrier, interrupt flag set
> > 
> > Missed this cost: for history reason, it also involves a guest exit from
> > I/O write (PCI_QUEUE_NOTIFY).
> 
> OK, after some research, it looks like the reason was the tx timer that
> qemu used to use. So the hack of avoiding the add_buf call will
> avoid this kick and so break these hosts.
> I guess we can add a feature bit to detect a new host
> and so avoid the kick. We are running low on feature bits
> unfortunately, but just fo testing, could you quantify the difference
> that this makes using the following patch:

Performance would suffer for those ancient qemus if we didn't do this,
but it wouldn't be fatal to them.

I think we should just remove it; the standard certainly doesn't mention
it.

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop
  2011-03-24  0:16         ` Rusty Russell
@ 2011-03-24  6:39           ` Michael S. Tsirkin
  0 siblings, 0 replies; 24+ messages in thread
From: Michael S. Tsirkin @ 2011-03-24  6:39 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Shirley Ma, Herbert Xu, davem, kvm, netdev, Anthony Liguori

On Thu, Mar 24, 2011 at 10:46:49AM +1030, Rusty Russell wrote:
> On Tue, 22 Mar 2011 13:36:50 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Mon, Mar 21, 2011 at 11:03:07AM -0700, Shirley Ma wrote:
> > > On Fri, 2011-03-18 at 18:41 -0700, Shirley Ma wrote:
> > > > > > +       /* Drop packet instead of stop queue for better
> > > > performance
> > > > > */
> > > > > 
> > > > > I would like to see some justification as to why this is the right
> > > > > way to go and not just papering over the real problem. 
> > > > 
> > > > Fair. KVM guest virtio_net TX queue stop/restart is pretty expensive,
> > > > which involves:
> > > > 
> > > > 1. Guest enable callback: one memory barrier, interrupt flag set
> > > 
> > > Missed this cost: for history reason, it also involves a guest exit from
> > > I/O write (PCI_QUEUE_NOTIFY).
> > 
> > OK, after some research, it looks like the reason was the tx timer that
> > qemu used to use. So the hack of avoiding the add_buf call will
> > avoid this kick and so break these hosts.
> > I guess we can add a feature bit to detect a new host
> > and so avoid the kick. We are running low on feature bits
> > unfortunately, but just fo testing, could you quantify the difference
> > that this makes using the following patch:
> 
> Performance would suffer for those ancient qemus if we didn't do this,
> but it wouldn't be fatal to them.
> 
> I think we should just remove it; the standard certainly doesn't mention
> it.
> 
> Cheers,
> Rusty.

I agree here. Anthony, agree?

-- 
MST

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2011-04-04  6:13 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-17  0:12 [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop Shirley Ma
2011-03-17  5:02 ` Michael S. Tsirkin
2011-03-17 15:18   ` Shirley Ma
2011-03-18  3:28     ` Shirley Ma
2011-03-18 13:15       ` Michael S. Tsirkin
2011-03-18 16:54         ` Shirley Ma
2011-03-17  5:10 ` Rusty Russell
2011-03-17 15:10   ` Shirley Ma
2011-03-18 13:33 ` Herbert Xu
2011-03-19  1:41   ` Shirley Ma
2011-03-21 18:03     ` Shirley Ma
2011-03-22 11:36       ` Michael S. Tsirkin
2011-03-23  2:26         ` Shirley Ma
2011-03-24  0:30           ` Rusty Russell
2011-03-24  4:14             ` Shirley Ma
2011-03-24 14:28             ` Michael S. Tsirkin
2011-03-24 17:46               ` Shirley Ma
2011-03-24 18:10                 ` Michael S. Tsirkin
2011-03-25  4:51                 ` Rusty Russell
2011-03-25  4:50               ` Rusty Russell
2011-03-27  7:52                 ` Michael S. Tsirkin
2011-04-04  6:13                   ` Rusty Russell
2011-03-24  0:16         ` Rusty Russell
2011-03-24  6:39           ` Michael S. Tsirkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).