* [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop @ 2011-03-17 0:12 Shirley Ma 2011-03-17 5:02 ` Michael S. Tsirkin ` (2 more replies) 0 siblings, 3 replies; 24+ messages in thread From: Shirley Ma @ 2011-03-17 0:12 UTC (permalink / raw) To: Michael S. Tsirkin, Rusty Russell; +Cc: David Miller, kvm, netdev Signed-off-by: Shirley Ma <xma@us.ibm.com> --- drivers/net/virtio_net.c | 39 ++++++++++++++++++++------------------- 1 files changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 82dba5a..c603daa 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -23,6 +23,7 @@ #include <linux/module.h> #include <linux/virtio.h> #include <linux/virtio_net.h> +#include <linux/virtio_ring.h> #include <linux/scatterlist.h> #include <linux/if_vlan.h> #include <linux/slab.h> @@ -509,19 +510,18 @@ again: return received; } -static unsigned int free_old_xmit_skbs(struct virtnet_info *vi) +static void free_old_xmit_skbs(struct virtnet_info *vi) { struct sk_buff *skb; - unsigned int len, tot_sgs = 0; + unsigned int len; while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) { pr_debug("Sent skb %p\n", skb); vi->dev->stats.tx_bytes += skb->len; vi->dev->stats.tx_packets++; - tot_sgs += skb_vnet_hdr(skb)->num_sg; dev_kfree_skb_any(skb); } - return tot_sgs; + return; } static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) @@ -574,11 +574,26 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); + bool drop; + bool indirect = virtio_has_feature(vi->vdev, + VIRTIO_RING_F_INDIRECT_DESC); int capacity; /* Free up any pending old buffers before queueing new ones. */ free_old_xmit_skbs(vi); - + capacity = virtqueue_get_capacity(vi->svq); + + /* Drop packet instead of stop queue for better performance */ + drop = (capacity == 0 && indirect) || + ((capacity < MAX_SKB_FRAGS + 2) && !indirect); + if (unlikely(drop)) { + dev->stats.tx_fifo_errors++; + dev_warn(&dev->dev, "Unexpected TX queue failure: %d\n", + capacity); + dev->stats.tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; + } /* Try to transmit */ capacity = xmit_skb(vi, skb); @@ -605,20 +620,6 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) skb_orphan(skb); nf_reset(skb); - /* Apparently nice girls don't return TX_BUSY; stop the queue - * before it gets out of hand. Naturally, this wastes entries. */ - if (capacity < 2+MAX_SKB_FRAGS) { - netif_stop_queue(dev); - if (unlikely(!virtqueue_enable_cb(vi->svq))) { - /* More just got used, free them then recheck. */ - capacity += free_old_xmit_skbs(vi); - if (capacity >= 2+MAX_SKB_FRAGS) { - netif_start_queue(dev); - virtqueue_disable_cb(vi->svq); - } - } - } - return NETDEV_TX_OK; } ^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-17 0:12 [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop Shirley Ma @ 2011-03-17 5:02 ` Michael S. Tsirkin 2011-03-17 15:18 ` Shirley Ma 2011-03-17 5:10 ` Rusty Russell 2011-03-18 13:33 ` Herbert Xu 2 siblings, 1 reply; 24+ messages in thread From: Michael S. Tsirkin @ 2011-03-17 5:02 UTC (permalink / raw) To: Shirley Ma; +Cc: Rusty Russell, David Miller, kvm, netdev, Herbert Xu On Wed, Mar 16, 2011 at 05:12:55PM -0700, Shirley Ma wrote: > Signed-off-by: Shirley Ma <xma@us.ibm.com> > --- > drivers/net/virtio_net.c | 39 ++++++++++++++++++++------------------- > 1 files changed, 20 insertions(+), 19 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 82dba5a..c603daa 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -23,6 +23,7 @@ > #include <linux/module.h> > #include <linux/virtio.h> > #include <linux/virtio_net.h> > +#include <linux/virtio_ring.h> > #include <linux/scatterlist.h> > #include <linux/if_vlan.h> > #include <linux/slab.h> > @@ -509,19 +510,18 @@ again: > return received; > } > > -static unsigned int free_old_xmit_skbs(struct virtnet_info *vi) > +static void free_old_xmit_skbs(struct virtnet_info *vi) > { > struct sk_buff *skb; > - unsigned int len, tot_sgs = 0; > + unsigned int len; > > while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) { > pr_debug("Sent skb %p\n", skb); > vi->dev->stats.tx_bytes += skb->len; > vi->dev->stats.tx_packets++; > - tot_sgs += skb_vnet_hdr(skb)->num_sg; > dev_kfree_skb_any(skb); > } > - return tot_sgs; > + return; > } > > static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) > @@ -574,11 +574,26 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) > static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > { > struct virtnet_info *vi = netdev_priv(dev); > + bool drop; > + bool indirect = virtio_has_feature(vi->vdev, > + VIRTIO_RING_F_INDIRECT_DESC); > int capacity; > > /* Free up any pending old buffers before queueing new ones. */ > free_old_xmit_skbs(vi); > - > + capacity = virtqueue_get_capacity(vi->svq); > + > + /* Drop packet instead of stop queue for better performance */ > + drop = (capacity == 0 && indirect) || > + ((capacity < MAX_SKB_FRAGS + 2) && !indirect); > + if (unlikely(drop)) { > + dev->stats.tx_fifo_errors++; > + dev_warn(&dev->dev, "Unexpected TX queue failure: %d\n", > + capacity); > + dev->stats.tx_dropped++; > + kfree_skb(skb); > + return NETDEV_TX_OK; > + } > /* Try to transmit */ > capacity = xmit_skb(vi, skb); So, this just tries to make sure there's enough space for max packet in the ring, if not - drop and return OK. Why bother checking beforehand though? If that's what we want to do, we can just call add_buf and see if it fails? > @@ -605,20 +620,6 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > skb_orphan(skb); > nf_reset(skb); > > - /* Apparently nice girls don't return TX_BUSY; stop the queue > - * before it gets out of hand. Naturally, this wastes entries. */ > - if (capacity < 2+MAX_SKB_FRAGS) { > - netif_stop_queue(dev); > - if (unlikely(!virtqueue_enable_cb(vi->svq))) { > - /* More just got used, free them then recheck. */ > - capacity += free_old_xmit_skbs(vi); > - if (capacity >= 2+MAX_SKB_FRAGS) { > - netif_start_queue(dev); > - virtqueue_disable_cb(vi->svq); > - } > - } > - } > - > return NETDEV_TX_OK; > } > > ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-17 5:02 ` Michael S. Tsirkin @ 2011-03-17 15:18 ` Shirley Ma 2011-03-18 3:28 ` Shirley Ma 0 siblings, 1 reply; 24+ messages in thread From: Shirley Ma @ 2011-03-17 15:18 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: Rusty Russell, David Miller, kvm, netdev, Herbert Xu On Thu, 2011-03-17 at 07:02 +0200, Michael S. Tsirkin wrote: > So, this just tries to make sure there's enough space for > max packet in the ring, if not - drop and return OK. > Why bother checking beforehand though? > If that's what we want to do, we can just call add_buf and see > if it fails? In add_buf, there is an additional kick, see below. I added check capacity to avoid this, thought it would be better performance. I will retest it w/i add_buf to see the performance difference. if (vq->num_free < out + in) { pr_debug("Can't add buf len %i - avail = %i\n", out + in, vq->num_free); /* FIXME: for historical reasons, we force a notify here if * there are outgoing parts to the buffer. Presumably the * host should service the ring ASAP. */ if (out) vq->notify(&vq->vq); END_USE(vq); return -ENOSPC; } Thanks Shirley ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-17 15:18 ` Shirley Ma @ 2011-03-18 3:28 ` Shirley Ma 2011-03-18 13:15 ` Michael S. Tsirkin 0 siblings, 1 reply; 24+ messages in thread From: Shirley Ma @ 2011-03-18 3:28 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: Rusty Russell, David Miller, kvm, netdev, Herbert Xu On Thu, 2011-03-17 at 08:18 -0700, Shirley Ma wrote: > On Thu, 2011-03-17 at 07:02 +0200, Michael S. Tsirkin wrote: > > So, this just tries to make sure there's enough space for > > max packet in the ring, if not - drop and return OK. > > Why bother checking beforehand though? > > If that's what we want to do, we can just call add_buf and see > > if it fails? > > In add_buf, there is an additional kick, see below. I added check > capacity to avoid this, thought it would be better performance. I will > retest it w/i add_buf to see the performance difference. > > if (vq->num_free < out + in) { > pr_debug("Can't add buf len %i - avail = %i\n", > out + in, vq->num_free); > /* FIXME: for historical reasons, we force a notify > here > if > * there are outgoing parts to the buffer. Presumably > the > * host should service the ring ASAP. */ > if (out) > vq->notify(&vq->vq); > END_USE(vq); > return -ENOSPC; > } > More test results: UDP_STREAM test results (% is guest vcpu, guest has 2 vpus): Send(netperf) ---- size 2.6.38-rc8 2.6.38-rc8+ 2.6.38-rc8 addbuf failure check capacity ----------------------------------------------------- 1K 1541.0/50.14% 2169.1/50.03% 3018.9/50% 2K 1649.7/33.74% 3362.6/50.18% 4518.8/50.47% 4K 2957.8/44.83% 5965.9/50.03% 9592.5/50% 8K 3788/39.01% 9852.8/50.25% 15483.8/50% 16K 4736.1/34.13% 14946.5/50.01% 21645.0/50% Recv(netserver w/i recv errors) ---- 1K 1541/8.36% 1554.4/9.67% 1675.1/11.26% 2K 1649.7/33.4% 1945.5/5.59% 2160.6/5.99% 4K 2556.3/5.07% 3044.8/7.12% 3118.9/7.86% 8K 3775/39.01% 4361/9/9.14% 4017.1/7.89% 16K 4466.4/8.56% 4435.2/10.95% 4446.8/9.92% TCP_STREAM test results (% is guest vcpu, guest has two vcpus): size 2.6.38-rc8 2.6.38-rc8+ 2.6.38-rc8 addbuf failure check capacity ------------------------------------------------------ 1K 2381.10/42.49% 5686.01/50.08% 5599.15/50.42% 2K 3205.08/49.18% 8675.93/48.59% 9241.86/48.42% 4K 5231.24/34.12% 9309.67/42.07% 9321.87/40.94% 8K 7952.74/35.85% 9001.95/38.26% 9265.45/37.63% 16K 8260.68/35.07% 7911.52/34.35% 8310.29/34.28% 64K 9103.75/28.98% 9219.12/31.52% 9094.38/29.44% qemu-kvm host cpu utilization also saved for both addbuf failure and check capacity patches, vhost cpu utilization are similar in all case. Looks like the additional guest notify in add_buf doesn't cost that much than I thought to be. Thanks Shirley ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-18 3:28 ` Shirley Ma @ 2011-03-18 13:15 ` Michael S. Tsirkin 2011-03-18 16:54 ` Shirley Ma 0 siblings, 1 reply; 24+ messages in thread From: Michael S. Tsirkin @ 2011-03-18 13:15 UTC (permalink / raw) To: Shirley Ma; +Cc: Rusty Russell, David Miller, kvm, netdev, Herbert Xu On Thu, Mar 17, 2011 at 08:28:47PM -0700, Shirley Ma wrote: > On Thu, 2011-03-17 at 08:18 -0700, Shirley Ma wrote: > > On Thu, 2011-03-17 at 07:02 +0200, Michael S. Tsirkin wrote: > > > So, this just tries to make sure there's enough space for > > > max packet in the ring, if not - drop and return OK. > > > Why bother checking beforehand though? > > > If that's what we want to do, we can just call add_buf and see > > > if it fails? > > > > In add_buf, there is an additional kick, see below. I added check > > capacity to avoid this, thought it would be better performance. I will > > retest it w/i add_buf to see the performance difference. > > > > if (vq->num_free < out + in) { > > pr_debug("Can't add buf len %i - avail = %i\n", > > out + in, vq->num_free); > > /* FIXME: for historical reasons, we force a notify > > here > > if > > * there are outgoing parts to the buffer. Presumably > > the > > * host should service the ring ASAP. */ > > if (out) > > vq->notify(&vq->vq); > > END_USE(vq); > > return -ENOSPC; > > } > > Rusty, could you pls clarify what are the historical reasons here? Are they still valid? If yes we could dedicate a feature flag to disabling this, or guess that the host is new by looking at some other feature flag. > More test results: > > UDP_STREAM test results (% is guest vcpu, guest has 2 vpus): > > Send(netperf) > ---- > > size 2.6.38-rc8 2.6.38-rc8+ 2.6.38-rc8 > addbuf failure check capacity > ----------------------------------------------------- > 1K 1541.0/50.14% 2169.1/50.03% 3018.9/50% > 2K 1649.7/33.74% 3362.6/50.18% 4518.8/50.47% > 4K 2957.8/44.83% 5965.9/50.03% 9592.5/50% > 8K 3788/39.01% 9852.8/50.25% 15483.8/50% > 16K 4736.1/34.13% 14946.5/50.01% 21645.0/50% Is this the local or remote throughput? With UDP_STREAM you are mostly interested in remote throughput, local one can be pretty high while most packets get dropped. > Looks like the additional guest notify in add_buf doesn't cost that much > than I thought to be. > > Thanks > Shirley ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-18 13:15 ` Michael S. Tsirkin @ 2011-03-18 16:54 ` Shirley Ma 0 siblings, 0 replies; 24+ messages in thread From: Shirley Ma @ 2011-03-18 16:54 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: Rusty Russell, David Miller, kvm, netdev, Herbert Xu On Fri, 2011-03-18 at 15:15 +0200, Michael S. Tsirkin wrote: > Is this the local or remote throughput? > With UDP_STREAM you are mostly interested in > remote throughput, local one can be pretty high > while most packets get dropped. This is local throughput. Remote is called recv(netserver) data. With default wmem/rmem, there are so many recv errors. I haven't tuned the results yet. Thanks Shirley ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-17 0:12 [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop Shirley Ma 2011-03-17 5:02 ` Michael S. Tsirkin @ 2011-03-17 5:10 ` Rusty Russell 2011-03-17 15:10 ` Shirley Ma 2011-03-18 13:33 ` Herbert Xu 2 siblings, 1 reply; 24+ messages in thread From: Rusty Russell @ 2011-03-17 5:10 UTC (permalink / raw) To: Shirley Ma, Michael S. Tsirkin; +Cc: David Miller, kvm, netdev On Wed, 16 Mar 2011 17:12:55 -0700, Shirley Ma <mashirle@us.ibm.com> wrote: > Signed-off-by: Shirley Ma <xma@us.ibm.com> This is fascinating... and deeply weird. OK, what's the difference between calling xmit_skb and ignoring failure, and this patch which figures out it's going to fail before calling xmit_skb? ie. what if you *just* delete this: > @@ -605,20 +620,6 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > skb_orphan(skb); > nf_reset(skb); > > - /* Apparently nice girls don't return TX_BUSY; stop the queue > - * before it gets out of hand. Naturally, this wastes entries. */ > - if (capacity < 2+MAX_SKB_FRAGS) { > - netif_stop_queue(dev); > - if (unlikely(!virtqueue_enable_cb(vi->svq))) { > - /* More just got used, free them then recheck. */ > - capacity += free_old_xmit_skbs(vi); > - if (capacity >= 2+MAX_SKB_FRAGS) { > - netif_start_queue(dev); > - virtqueue_disable_cb(vi->svq); > - } > - } > - } > - > return NETDEV_TX_OK; > } Thanks! Rusty. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-17 5:10 ` Rusty Russell @ 2011-03-17 15:10 ` Shirley Ma 0 siblings, 0 replies; 24+ messages in thread From: Shirley Ma @ 2011-03-17 15:10 UTC (permalink / raw) To: Rusty Russell; +Cc: Michael S. Tsirkin, David Miller, kvm, netdev On Thu, 2011-03-17 at 15:40 +1030, Rusty Russell wrote: > This is fascinating... and deeply weird. > > OK, what's the difference between calling xmit_skb and ignoring > failure, > and this patch which figures out it's going to fail before calling > xmit_skb? > > ie. what if you *just* delete this: Somehow what was in my mind, add_buf is more expensive than simply checked ring capacity. I retest it with your and Michael's suggestion here. Thanks Shirley ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-17 0:12 [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop Shirley Ma 2011-03-17 5:02 ` Michael S. Tsirkin 2011-03-17 5:10 ` Rusty Russell @ 2011-03-18 13:33 ` Herbert Xu 2011-03-19 1:41 ` Shirley Ma 2 siblings, 1 reply; 24+ messages in thread From: Herbert Xu @ 2011-03-18 13:33 UTC (permalink / raw) To: Shirley Ma; +Cc: mst, rusty, davem, kvm, netdev Shirley Ma <mashirle@us.ibm.com> wrote: > > + /* Drop packet instead of stop queue for better performance */ I would like to see some justification as to why this is the right way to go and not just papering over the real problem. Thanks, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-18 13:33 ` Herbert Xu @ 2011-03-19 1:41 ` Shirley Ma 2011-03-21 18:03 ` Shirley Ma 0 siblings, 1 reply; 24+ messages in thread From: Shirley Ma @ 2011-03-19 1:41 UTC (permalink / raw) To: Herbert Xu; +Cc: mst, rusty, davem, kvm, netdev On Fri, 2011-03-18 at 08:33 -0500, Herbert Xu wrote: > Shirley Ma <mashirle@us.ibm.com> wrote: > > > > + /* Drop packet instead of stop queue for better performance > */ > > I would like to see some justification as to why this is the right > way to go and not just papering over the real problem. Fair. KVM guest virtio_net TX queue stop/restart is pretty expensive, which involves: 1. Guest enable callback: one memory barrier, interrupt flag set 2. Host signals guest: one memory barrier, and a TX interrupt from host to KVM guest through evenfd_signal. Most of the workload so far we barely see TX over run, except for small messages TCP_STREAM. For small message size TCP_STREAM workload, no matter how big the TX queue size is, it always causes overrun. I even re-enable the TX queue when it's empty, it still hits TX overrun again and again. Somehow KVM guest and host is not in pace on processing small packets. I tried to pin each thread to different CPU, it didn't help. So it didn't seem to be scheduling related. >From the performance results, we can see dramatically performance gain with this patch. I would like to dig out the real reason why host can't in pace with guest, but haven't figured it out in month, that's the reason I held this patch for a while. However if anyone can give me any ideas on how to debug the real problem, I am willing to try it out. Thanks Shirley ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-19 1:41 ` Shirley Ma @ 2011-03-21 18:03 ` Shirley Ma 2011-03-22 11:36 ` Michael S. Tsirkin 0 siblings, 1 reply; 24+ messages in thread From: Shirley Ma @ 2011-03-21 18:03 UTC (permalink / raw) To: Herbert Xu; +Cc: mst, rusty, davem, kvm, netdev On Fri, 2011-03-18 at 18:41 -0700, Shirley Ma wrote: > > > + /* Drop packet instead of stop queue for better > performance > > */ > > > > I would like to see some justification as to why this is the right > > way to go and not just papering over the real problem. > > Fair. KVM guest virtio_net TX queue stop/restart is pretty expensive, > which involves: > > 1. Guest enable callback: one memory barrier, interrupt flag set Missed this cost: for history reason, it also involves a guest exit from I/O write (PCI_QUEUE_NOTIFY). > 2. Host signals guest: one memory barrier, and a TX interrupt from > host > to KVM guest through evenfd_signal. > > > Most of the workload so far we barely see TX over run, except for > small > messages TCP_STREAM. > > For small message size TCP_STREAM workload, no matter how big the TX > queue size is, it always causes overrun. I even re-enable the TX queue > when it's empty, it still hits TX overrun again and again. > > Somehow KVM guest and host is not in pace on processing small packets. > I > tried to pin each thread to different CPU, it didn't help. So it > didn't > seem to be scheduling related. > > >From the performance results, we can see dramatically performance > gain > with this patch. > > I would like to dig out the real reason why host can't in pace with > guest, but haven't figured it out in month, that's the reason I held > this patch for a while. However if anyone can give me any ideas on how > to debug the real problem, I am willing to try it out. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-21 18:03 ` Shirley Ma @ 2011-03-22 11:36 ` Michael S. Tsirkin 2011-03-23 2:26 ` Shirley Ma 2011-03-24 0:16 ` Rusty Russell 0 siblings, 2 replies; 24+ messages in thread From: Michael S. Tsirkin @ 2011-03-22 11:36 UTC (permalink / raw) To: Shirley Ma; +Cc: Herbert Xu, rusty, davem, kvm, netdev On Mon, Mar 21, 2011 at 11:03:07AM -0700, Shirley Ma wrote: > On Fri, 2011-03-18 at 18:41 -0700, Shirley Ma wrote: > > > > + /* Drop packet instead of stop queue for better > > performance > > > */ > > > > > > I would like to see some justification as to why this is the right > > > way to go and not just papering over the real problem. > > > > Fair. KVM guest virtio_net TX queue stop/restart is pretty expensive, > > which involves: > > > > 1. Guest enable callback: one memory barrier, interrupt flag set > > Missed this cost: for history reason, it also involves a guest exit from > I/O write (PCI_QUEUE_NOTIFY). OK, after some research, it looks like the reason was the tx timer that qemu used to use. So the hack of avoiding the add_buf call will avoid this kick and so break these hosts. I guess we can add a feature bit to detect a new host and so avoid the kick. We are running low on feature bits unfortunately, but just fo testing, could you quantify the difference that this makes using the following patch: diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index cc2f73e..6106017 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -185,11 +185,6 @@ int virtqueue_add_buf_gfp(struct virtqueue *_vq, if (vq->num_free < out + in) { pr_debug("Can't add buf len %i - avail = %i\n", out + in, vq->num_free); - /* FIXME: for historical reasons, we force a notify here if - * there are outgoing parts to the buffer. Presumably the - * host should service the ring ASAP. */ - if (out) - vq->notify(&vq->vq); END_USE(vq); return -ENOSPC; } -- MST ^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-22 11:36 ` Michael S. Tsirkin @ 2011-03-23 2:26 ` Shirley Ma 2011-03-24 0:30 ` Rusty Russell 2011-03-24 0:16 ` Rusty Russell 1 sibling, 1 reply; 24+ messages in thread From: Shirley Ma @ 2011-03-23 2:26 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: Herbert Xu, rusty, davem, kvm, netdev On Tue, 2011-03-22 at 13:36 +0200, Michael S. Tsirkin wrote: > diff --git a/drivers/virtio/virtio_ring.c > b/drivers/virtio/virtio_ring.c > index cc2f73e..6106017 100644 > --- a/drivers/virtio/virtio_ring.c > +++ b/drivers/virtio/virtio_ring.c > @@ -185,11 +185,6 @@ int virtqueue_add_buf_gfp(struct virtqueue *_vq, > if (vq->num_free < out + in) { > pr_debug("Can't add buf len %i - avail = %i\n", > out + in, vq->num_free); > - /* FIXME: for historical reasons, we force a notify > here if > - * there are outgoing parts to the buffer. Presumably > the > - * host should service the ring ASAP. */ > - if (out) > - vq->notify(&vq->vq); > END_USE(vq); > return -ENOSPC; > } > With simply removing the notify here, it does help the case when TX overrun hits too often, for example for 1K message size, the single TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s. Thanks Shirley ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-23 2:26 ` Shirley Ma @ 2011-03-24 0:30 ` Rusty Russell 2011-03-24 4:14 ` Shirley Ma 2011-03-24 14:28 ` Michael S. Tsirkin 0 siblings, 2 replies; 24+ messages in thread From: Rusty Russell @ 2011-03-24 0:30 UTC (permalink / raw) To: Shirley Ma, Michael S. Tsirkin; +Cc: Herbert Xu, davem, kvm, netdev > With simply removing the notify here, it does help the case when TX > overrun hits too often, for example for 1K message size, the single > TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s. OK, we'll be getting rid of the "kick on full", so please delete that on all benchmarks. Now, does the capacity check before add_buf() still win anything? I can't see how unless we have some weird bug. Once we've sorted that out, we should look at the more radical change of publishing last_used and using that to intuit whether interrupts should be sent. If we're not careful with ordering and barriers that could introduce more bugs. Anything else on the optimization agenda I've missed? Thanks, Rusty. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-24 0:30 ` Rusty Russell @ 2011-03-24 4:14 ` Shirley Ma 2011-03-24 14:28 ` Michael S. Tsirkin 1 sibling, 0 replies; 24+ messages in thread From: Shirley Ma @ 2011-03-24 4:14 UTC (permalink / raw) To: Rusty Russell; +Cc: Michael S. Tsirkin, Herbert Xu, davem, kvm, netdev On Thu, 2011-03-24 at 11:00 +1030, Rusty Russell wrote: > > With simply removing the notify here, it does help the case when TX > > overrun hits too often, for example for 1K message size, the single > > TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s. > > OK, we'll be getting rid of the "kick on full", so please delete that > on > all benchmarks. > > Now, does the capacity check before add_buf() still win anything? I > can't see how unless we have some weird bug. > > Once we've sorted that out, we should look at the more radical change > of publishing last_used and using that to intuit whether interrupts > should be sent. If we're not careful with ordering and barriers that > could introduce more bugs. Without the kick, it's not necessary for capacity check. I am regenerating the patch with add_buf check and summit the patch after passing all tests. > Anything else on the optimization agenda I've missed? Tom found small performance gain with freeing the used buffers when half of the ring is full in TCP_RR workload. I think we need a new API in virtio, which frees all used buffers at once, I am testing the performance now, the new API looks like: drivers/virtio/virtio_ring.c | 40 +++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 6 +++++ diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index cc2f73e..6d2dc16 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -329,6 +329,46 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) } EXPORT_SYMBOL_GPL(virtqueue_get_buf); +int virtqueue_free_used(struct virtqueue *_vq, void (*free)(void *)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + unsigned int i; + void *buf; + + START_USE(vq); + + if (unlikely(vq->broken)) { + END_USE(vq); + return NULL; + } + + /* Only get used array entries after they have been exposed by host. */ + virtio_rmb(); + + while (vq->last_used_idx != vq->vring.used->idx) { + i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; + + if (unlikely(i >= vq->vring.num)) { + BAD_RING(vq, "id %u out of range\n", i); + return NULL; + } + if (unlikely(!vq->data[i])) { + BAD_RING(vq, "id %u is not a head!\n", i); + return NULL; + } + + /* detach_buf clears data, so grab it now. */ + buf = vq->data[i]; + detach_buf(vq, i); + free(buf); + vq->last_used_idx++; + } + END_USE(vq); + return vq->num_free; +} + +EXPORT_SYMBOL_GPL(virtqueue_free_used); + void virtqueue_disable_cb(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index aff5b4f..19acc66 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -42,6 +42,10 @@ struct virtqueue { * vq: the struct virtqueue we're talking about. * len: the length written into the buffer * Returns NULL or the "data" token handed to add_buf. + * virtqueue_free_used: free all used buffers in the queue + * vq: the struct virtqueue we're talking about. + * free: free buf function from caller. + * Returns remaining capacity of the queue. * virtqueue_disable_cb: disable callbacks * vq: the struct virtqueue we're talking about. * Note that this is not necessarily synchronous, hence unreliable and only @@ -82,6 +86,8 @@ void virtqueue_kick(struct virtqueue *vq); void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); +int virtqueue_free_used(struct virtqueue *vq, void (*free)(void *buf)); + void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); Thanks Shirley ^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-24 0:30 ` Rusty Russell 2011-03-24 4:14 ` Shirley Ma @ 2011-03-24 14:28 ` Michael S. Tsirkin 2011-03-24 17:46 ` Shirley Ma 2011-03-25 4:50 ` Rusty Russell 1 sibling, 2 replies; 24+ messages in thread From: Michael S. Tsirkin @ 2011-03-24 14:28 UTC (permalink / raw) To: Rusty Russell; +Cc: Shirley Ma, Herbert Xu, davem, kvm, netdev On Thu, Mar 24, 2011 at 11:00:53AM +1030, Rusty Russell wrote: > > With simply removing the notify here, it does help the case when TX > > overrun hits too often, for example for 1K message size, the single > > TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s. > > OK, we'll be getting rid of the "kick on full", so please delete that on > all benchmarks. > > Now, does the capacity check before add_buf() still win anything? I > can't see how unless we have some weird bug. > > Once we've sorted that out, we should look at the more radical change > of publishing last_used and using that to intuit whether interrupts > should be sent. If we're not careful with ordering and barriers that > could introduce more bugs. Right. I am working on this, and trying to be careful. One thing I'm in doubt about: sometimes we just want to disable interrupts. Should still use flags in that case? I thought that if we make the published index 0 to vq->num - 1, then a special value in the index field could disable interrupts completely. We could even reuse the space for the flags field to stick the index in. Too complex? > Anything else on the optimization agenda I've missed? > > Thanks, > Rusty. Several other things I am looking at, wellcome cooperation: 1. It's probably a good idea to update avail index immediately instead of upon kick: for RX this might help parallelism with the host. 2. Adding an API to add a single buffer instead of s/g, seems to help a bit. 3. For TX sometimes we free a single buffer, sometimes a ton of them, which might make the transmit latency vary. It's probably a good idea to limit this, maybe free the minimal number possible to keep the device going without stops, maybe free up to MAX_SKB_FRAGS. 4. If the ring is full, we now notify right after the first entry is consumed. For TX this is suboptimal, we should try delaying the interrupt on host. More ideas, would be nice if someone can try them out: 1. We are allocating/freeing buffers for indirect descriptors. Use some kind of pool instead? And we could preformat part of the descriptor. 2. I didn't have time to work on virtio2 ideas presented at the kvm forum yet, any takers? -- MST ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-24 14:28 ` Michael S. Tsirkin @ 2011-03-24 17:46 ` Shirley Ma 2011-03-24 18:10 ` Michael S. Tsirkin 2011-03-25 4:51 ` Rusty Russell 2011-03-25 4:50 ` Rusty Russell 1 sibling, 2 replies; 24+ messages in thread From: Shirley Ma @ 2011-03-24 17:46 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: Rusty Russell, Herbert Xu, davem, kvm, netdev On Thu, 2011-03-24 at 16:28 +0200, Michael S. Tsirkin wrote: > On Thu, Mar 24, 2011 at 11:00:53AM +1030, Rusty Russell wrote: > > > With simply removing the notify here, it does help the case when TX > > > overrun hits too often, for example for 1K message size, the single > > > TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s. > > > > OK, we'll be getting rid of the "kick on full", so please delete that on > > all benchmarks. > > > > Now, does the capacity check before add_buf() still win anything? I > > can't see how unless we have some weird bug. > > > > Once we've sorted that out, we should look at the more radical change > > of publishing last_used and using that to intuit whether interrupts > > should be sent. If we're not careful with ordering and barriers that > > could introduce more bugs. > > Right. I am working on this, and trying to be careful. > One thing I'm in doubt about: sometimes we just want to > disable interrupts. Should still use flags in that case? > I thought that if we make the published index 0 to vq->num - 1, > then a special value in the index field could disable > interrupts completely. We could even reuse the space > for the flags field to stick the index in. Too complex? > > Anything else on the optimization agenda I've missed? > > > > Thanks, > > Rusty. > > Several other things I am looking at, wellcome cooperation: > 1. It's probably a good idea to update avail index > immediately instead of upon kick: for RX > this might help parallelism with the host. Is that possible to use the same idea for publishing last used idx to publish avail idx? Then we can save guest iowrite/exits. > 2. Adding an API to add a single buffer instead of s/g, > seems to help a bit. > > 3. For TX sometimes we free a single buffer, sometimes > a ton of them, which might make the transmit latency > vary. It's probably a good idea to limit this, > maybe free the minimal number possible to keep the device > going without stops, maybe free up to MAX_SKB_FRAGS. I am playing with it now, to collect more perf data to see what's the best value to free number of used buffers. > 4. If the ring is full, we now notify right after > the first entry is consumed. For TX this is suboptimal, > we should try delaying the interrupt on host. > More ideas, would be nice if someone can try them out: > 1. We are allocating/freeing buffers for indirect descriptors. > Use some kind of pool instead? > And we could preformat part of the descriptor. > 2. I didn't have time to work on virtio2 ideas presented > at the kvm forum yet, any takers? If I have time, I will look at this. Thanks Shirley ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-24 17:46 ` Shirley Ma @ 2011-03-24 18:10 ` Michael S. Tsirkin 2011-03-25 4:51 ` Rusty Russell 1 sibling, 0 replies; 24+ messages in thread From: Michael S. Tsirkin @ 2011-03-24 18:10 UTC (permalink / raw) To: Shirley Ma; +Cc: Rusty Russell, Herbert Xu, davem, kvm, netdev On Thu, Mar 24, 2011 at 10:46:49AM -0700, Shirley Ma wrote: > On Thu, 2011-03-24 at 16:28 +0200, Michael S. Tsirkin wrote: > > On Thu, Mar 24, 2011 at 11:00:53AM +1030, Rusty Russell wrote: > > > > With simply removing the notify here, it does help the case when TX > > > > overrun hits too often, for example for 1K message size, the single > > > > TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s. > > > > > > OK, we'll be getting rid of the "kick on full", so please delete that on > > > all benchmarks. > > > > > > Now, does the capacity check before add_buf() still win anything? I > > > can't see how unless we have some weird bug. > > > > > > Once we've sorted that out, we should look at the more radical change > > > of publishing last_used and using that to intuit whether interrupts > > > should be sent. If we're not careful with ordering and barriers that > > > could introduce more bugs. > > > > Right. I am working on this, and trying to be careful. > > One thing I'm in doubt about: sometimes we just want to > > disable interrupts. Should still use flags in that case? > > I thought that if we make the published index 0 to vq->num - 1, > > then a special value in the index field could disable > > interrupts completely. We could even reuse the space > > for the flags field to stick the index in. Too complex? > > > Anything else on the optimization agenda I've missed? > > > > > > Thanks, > > > Rusty. > > > > Several other things I am looking at, wellcome cooperation: > > 1. It's probably a good idea to update avail index > > immediately instead of upon kick: for RX > > this might help parallelism with the host. > Is that possible to use the same idea for publishing last used idx to > publish avail idx? Then we can save guest iowrite/exits. Yes, but unrelated to 1 above :) > > 2. Adding an API to add a single buffer instead of s/g, > > seems to help a bit. > > > > 3. For TX sometimes we free a single buffer, sometimes > > a ton of them, which might make the transmit latency > > vary. It's probably a good idea to limit this, > > maybe free the minimal number possible to keep the device > > going without stops, maybe free up to MAX_SKB_FRAGS. > > I am playing with it now, to collect more perf data to see what's the > best value to free number of used buffers. The best IMO is to keep the number of freed buffers constant so that we have more or less identical overhead for each packet. > > 4. If the ring is full, we now notify right after > > the first entry is consumed. For TX this is suboptimal, > > we should try delaying the interrupt on host. > > > More ideas, would be nice if someone can try them out: > > 1. We are allocating/freeing buffers for indirect descriptors. > > Use some kind of pool instead? > > And we could preformat part of the descriptor. > > 2. I didn't have time to work on virtio2 ideas presented > > at the kvm forum yet, any takers? > If I have time, I will look at this. > > Thanks > Shirley > ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-24 17:46 ` Shirley Ma 2011-03-24 18:10 ` Michael S. Tsirkin @ 2011-03-25 4:51 ` Rusty Russell 1 sibling, 0 replies; 24+ messages in thread From: Rusty Russell @ 2011-03-25 4:51 UTC (permalink / raw) To: Shirley Ma, Michael S. Tsirkin; +Cc: Herbert Xu, davem, kvm, netdev On Thu, 24 Mar 2011 10:46:49 -0700, Shirley Ma <mashirle@us.ibm.com> wrote: > On Thu, 2011-03-24 at 16:28 +0200, Michael S. Tsirkin wrote: > > Several other things I am looking at, wellcome cooperation: > > 1. It's probably a good idea to update avail index > > immediately instead of upon kick: for RX > > this might help parallelism with the host. > Is that possible to use the same idea for publishing last used idx to > publish avail idx? Then we can save guest iowrite/exits. Yes, it should be symmetrical. Test independently of course, but the same logic applies. Thanks! Rusty. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-24 14:28 ` Michael S. Tsirkin 2011-03-24 17:46 ` Shirley Ma @ 2011-03-25 4:50 ` Rusty Russell 2011-03-27 7:52 ` Michael S. Tsirkin 1 sibling, 1 reply; 24+ messages in thread From: Rusty Russell @ 2011-03-25 4:50 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: Shirley Ma, Herbert Xu, davem, kvm, netdev On Thu, 24 Mar 2011 16:28:22 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote: > On Thu, Mar 24, 2011 at 11:00:53AM +1030, Rusty Russell wrote: > > > With simply removing the notify here, it does help the case when TX > > > overrun hits too often, for example for 1K message size, the single > > > TCP_STREAM performance improved from 2.xGb/s to 4.xGb/s. > > > > OK, we'll be getting rid of the "kick on full", so please delete that on > > all benchmarks. > > > > Now, does the capacity check before add_buf() still win anything? I > > can't see how unless we have some weird bug. > > > > Once we've sorted that out, we should look at the more radical change > > of publishing last_used and using that to intuit whether interrupts > > should be sent. If we're not careful with ordering and barriers that > > could introduce more bugs. > > Right. I am working on this, and trying to be careful. > One thing I'm in doubt about: sometimes we just want to > disable interrupts. Should still use flags in that case? > I thought that if we make the published index 0 to vq->num - 1, > then a special value in the index field could disable > interrupts completely. We could even reuse the space > for the flags field to stick the index in. Too complex? Making the index free-running avoids the "full or empty" confusion, plus offers and extra debugging insight. I think that if they really want to disable interrrupts, the flag should still work, and when the client accepts the "publish last_idx" feature they are accepting that interrupts may be omitted if they haven't updated last_idx yet. > > Anything else on the optimization agenda I've missed? > > > > Thanks, > > Rusty. > > Several other things I am looking at, wellcome cooperation: > 1. It's probably a good idea to update avail index > immediately instead of upon kick: for RX > this might help parallelism with the host. Yes, once we've done everything else, we should measure this. It makes sense. > 2. Adding an API to add a single buffer instead of s/g, > seems to help a bit. This goes last, since it's kind of an ugly hack, but all internal to Linux if we decide it's a win. > 3. For TX sometimes we free a single buffer, sometimes > a ton of them, which might make the transmit latency > vary. It's probably a good idea to limit this, > maybe free the minimal number possible to keep the device > going without stops, maybe free up to MAX_SKB_FRAGS. This kind of heuristic is going to be quite variable depending on circumstance, I think, so it's a lot of work to make sure we get it right. > 4. If the ring is full, we now notify right after > the first entry is consumed. For TX this is suboptimal, > we should try delaying the interrupt on host. Lguest already does that: only sends an interrupt when it's run out of things to do. It does update the used ring, however, as it processes them. This seems sensible to me, but needs to be measured separately as well. > More ideas, would be nice if someone can try them out: > 1. We are allocating/freeing buffers for indirect descriptors. > Use some kind of pool instead? > And we could preformat part of the descriptor. We need some poolish mechanism for virtio_blk too; perhaps an allocation callback which both can use (virtio_blk to alloc from a pool, virtio_net to recycle?). Along similar lines to preformatting, we could actually try to prepend the skb_vnet_hdr to the vnet data, and use a single descriptor for the hdr and the first part of the packet. Though IIRC, qemu's virtio barfs if the first descriptor isn't just the hdr (barf...). > 2. I didn't have time to work on virtio2 ideas presented > at the kvm forum yet, any takers? I didn't even attend. But I think that virtio is moribund for the moment; there wasn't enough demand and it's clear that there are optimization unexplored in virtio1. Cheers, Rusty. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-25 4:50 ` Rusty Russell @ 2011-03-27 7:52 ` Michael S. Tsirkin 2011-04-04 6:13 ` Rusty Russell 0 siblings, 1 reply; 24+ messages in thread From: Michael S. Tsirkin @ 2011-03-27 7:52 UTC (permalink / raw) To: Rusty Russell; +Cc: Shirley Ma, Herbert Xu, davem, kvm, netdev On Fri, Mar 25, 2011 at 03:20:46PM +1030, Rusty Russell wrote: > > 3. For TX sometimes we free a single buffer, sometimes > > a ton of them, which might make the transmit latency > > vary. It's probably a good idea to limit this, > > maybe free the minimal number possible to keep the device > > going without stops, maybe free up to MAX_SKB_FRAGS. > > This kind of heuristic is going to be quite variable depending on > circumstance, I think, so it's a lot of work to make sure we get it > right. Hmm, trying to keep the amount of work per descriptor constant seems to make sense though, no? Latency variations are not good for either RT uses or protocols such as TCP. > > 4. If the ring is full, we now notify right after > > the first entry is consumed. For TX this is suboptimal, > > we should try delaying the interrupt on host. > > Lguest already does that: only sends an interrupt when it's run out of > things to do. It does update the used ring, however, as it processes > them. There are many approaches here I suspect something like interrupt after half work is done might be better for parallelism. > > This seems sensible to me, but needs to be measured separately as well. Agree. > > More ideas, would be nice if someone can try them out: > > 1. We are allocating/freeing buffers for indirect descriptors. > > Use some kind of pool instead? > > And we could preformat part of the descriptor. > > We need some poolish mechanism for virtio_blk too; perhaps an allocation > callback which both can use (virtio_blk to alloc from a pool, virtio_net > to recycle?). BTW for recycling, need to be careful about numa effects: probably store cpu id and reallocate if we switch cpus ... (or noma nodes - unfortunately not always described correctly). > Along similar lines to preformatting, we could actually try to prepend > the skb_vnet_hdr to the vnet data, and use a single descriptor for the > hdr and the first part of the packet. > > Though IIRC, qemu's virtio barfs if the first descriptor isn't just the > hdr (barf...). Maybe we can try fixing this before adding more flags, then e.g. publish used flag can be resued to also tell us layout is flexible. Or just add a feature flag for that. > > 2. I didn't have time to work on virtio2 ideas presented > > at the kvm forum yet, any takers? > > I didn't even attend. Hmm, right. But what was presented there was discussed on list as well: a single R/W descriptor ring with valid bit instead of 2 rings + a descriptor array. > But I think that virtio is moribund for the > moment; there wasn't enough demand and it's clear that there are > optimization unexplored in virtio1. I agree absolutely that not all lessons has been learned, playing with different ring layouts would make at least an interesting paper IMO. -- MST ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-27 7:52 ` Michael S. Tsirkin @ 2011-04-04 6:13 ` Rusty Russell 0 siblings, 0 replies; 24+ messages in thread From: Rusty Russell @ 2011-04-04 6:13 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: Shirley Ma, Herbert Xu, davem, kvm, netdev On Sun, 27 Mar 2011 09:52:54 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote: > > Though IIRC, qemu's virtio barfs if the first descriptor isn't just the > > hdr (barf...). > > Maybe we can try fixing this before adding more flags, > then e.g. publish used flag can be resued to also > tell us layout is flexible. Or just add a feature flag for that. We should probably do this at some stage, yes. > > > 2. I didn't have time to work on virtio2 ideas presented > > > at the kvm forum yet, any takers? > > > > I didn't even attend. > > Hmm, right. But what was presented there was discussed on list as well: > a single R/W descriptor ring with valid bit instead of 2 rings > + a descriptor array. I'll be happy when we reach the point that the extra cacheline is hurting us :) Then we should do direct descriptors w/ a cookie as the value to hand back when finished. That seems to be close to optimal. > I agree absolutely that not all lessons has been learned, > playing with different ring layouts would make at least > an interesting paper IMO. Yes, I'd like to see the results... Thanks, Rusty. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-22 11:36 ` Michael S. Tsirkin 2011-03-23 2:26 ` Shirley Ma @ 2011-03-24 0:16 ` Rusty Russell 2011-03-24 6:39 ` Michael S. Tsirkin 1 sibling, 1 reply; 24+ messages in thread From: Rusty Russell @ 2011-03-24 0:16 UTC (permalink / raw) To: Michael S. Tsirkin, Shirley Ma; +Cc: Herbert Xu, davem, kvm, netdev On Tue, 22 Mar 2011 13:36:50 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote: > On Mon, Mar 21, 2011 at 11:03:07AM -0700, Shirley Ma wrote: > > On Fri, 2011-03-18 at 18:41 -0700, Shirley Ma wrote: > > > > > + /* Drop packet instead of stop queue for better > > > performance > > > > */ > > > > > > > > I would like to see some justification as to why this is the right > > > > way to go and not just papering over the real problem. > > > > > > Fair. KVM guest virtio_net TX queue stop/restart is pretty expensive, > > > which involves: > > > > > > 1. Guest enable callback: one memory barrier, interrupt flag set > > > > Missed this cost: for history reason, it also involves a guest exit from > > I/O write (PCI_QUEUE_NOTIFY). > > OK, after some research, it looks like the reason was the tx timer that > qemu used to use. So the hack of avoiding the add_buf call will > avoid this kick and so break these hosts. > I guess we can add a feature bit to detect a new host > and so avoid the kick. We are running low on feature bits > unfortunately, but just fo testing, could you quantify the difference > that this makes using the following patch: Performance would suffer for those ancient qemus if we didn't do this, but it wouldn't be fatal to them. I think we should just remove it; the standard certainly doesn't mention it. Cheers, Rusty. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop 2011-03-24 0:16 ` Rusty Russell @ 2011-03-24 6:39 ` Michael S. Tsirkin 0 siblings, 0 replies; 24+ messages in thread From: Michael S. Tsirkin @ 2011-03-24 6:39 UTC (permalink / raw) To: Rusty Russell; +Cc: Shirley Ma, Herbert Xu, davem, kvm, netdev, Anthony Liguori On Thu, Mar 24, 2011 at 10:46:49AM +1030, Rusty Russell wrote: > On Tue, 22 Mar 2011 13:36:50 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote: > > On Mon, Mar 21, 2011 at 11:03:07AM -0700, Shirley Ma wrote: > > > On Fri, 2011-03-18 at 18:41 -0700, Shirley Ma wrote: > > > > > > + /* Drop packet instead of stop queue for better > > > > performance > > > > > */ > > > > > > > > > > I would like to see some justification as to why this is the right > > > > > way to go and not just papering over the real problem. > > > > > > > > Fair. KVM guest virtio_net TX queue stop/restart is pretty expensive, > > > > which involves: > > > > > > > > 1. Guest enable callback: one memory barrier, interrupt flag set > > > > > > Missed this cost: for history reason, it also involves a guest exit from > > > I/O write (PCI_QUEUE_NOTIFY). > > > > OK, after some research, it looks like the reason was the tx timer that > > qemu used to use. So the hack of avoiding the add_buf call will > > avoid this kick and so break these hosts. > > I guess we can add a feature bit to detect a new host > > and so avoid the kick. We are running low on feature bits > > unfortunately, but just fo testing, could you quantify the difference > > that this makes using the following patch: > > Performance would suffer for those ancient qemus if we didn't do this, > but it wouldn't be fatal to them. > > I think we should just remove it; the standard certainly doesn't mention > it. > > Cheers, > Rusty. I agree here. Anthony, agree? -- MST ^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2011-04-04 6:13 UTC | newest] Thread overview: 24+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2011-03-17 0:12 [PATCH 2/2] virtio_net: remove send completion interrupts and avoid TX queue overrun through packet drop Shirley Ma 2011-03-17 5:02 ` Michael S. Tsirkin 2011-03-17 15:18 ` Shirley Ma 2011-03-18 3:28 ` Shirley Ma 2011-03-18 13:15 ` Michael S. Tsirkin 2011-03-18 16:54 ` Shirley Ma 2011-03-17 5:10 ` Rusty Russell 2011-03-17 15:10 ` Shirley Ma 2011-03-18 13:33 ` Herbert Xu 2011-03-19 1:41 ` Shirley Ma 2011-03-21 18:03 ` Shirley Ma 2011-03-22 11:36 ` Michael S. Tsirkin 2011-03-23 2:26 ` Shirley Ma 2011-03-24 0:30 ` Rusty Russell 2011-03-24 4:14 ` Shirley Ma 2011-03-24 14:28 ` Michael S. Tsirkin 2011-03-24 17:46 ` Shirley Ma 2011-03-24 18:10 ` Michael S. Tsirkin 2011-03-25 4:51 ` Rusty Russell 2011-03-25 4:50 ` Rusty Russell 2011-03-27 7:52 ` Michael S. Tsirkin 2011-04-04 6:13 ` Rusty Russell 2011-03-24 0:16 ` Rusty Russell 2011-03-24 6:39 ` Michael S. Tsirkin
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).