From: Rusty Russell <rusty@rustcorp.com.au>
To: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Sridhar Samudrala <sri@us.ibm.com>,
mst@redhat.com, netdev@vger.kernel.org
Subject: Re: [RFC PATCH] Regression in linux 2.6.32 virtio_net seen with vhost-net
Date: Wed, 16 Dec 2009 23:15:38 +1030 [thread overview]
Message-ID: <200912162315.38802.rusty@rustcorp.com.au> (raw)
In-Reply-To: <20091216025331.GA13935@gondor.apana.org.au>
On Wed, 16 Dec 2009 01:23:31 pm Herbert Xu wrote:
> On Wed, Dec 16, 2009 at 01:11:40PM +1030, Rusty Russell wrote:
> >
> > Thanks for the hint. They seem to use NAPI for xmit cleanup, so that's
> > what we should do? I'll try, but such a rewrite doesn't belong in 2.6.32.
>
> Well it depends. Real drivers can't touch the hardware so they're
> stuck with whatever the hardware does. For virtio we do have the
> flexibility of modifying the backend.
>
> Having said that, for existing backends that will signal when there
> is just a single free entry on the queue something like NAPI could
> reduce the overhead associated with the IRQs.
OK, this is unfortunately untested, but wanted to send it out tonight:
virtio_net: use NAPI for xmit (UNTESTED)
This is closer to the way tg3 and ixgbe do it: use the NAPI framework to
free transmitted packets. It neatens things a little as well.
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -47,6 +47,9 @@ struct virtnet_info
struct napi_struct napi;
unsigned int status;
+ /* We free packets and decide whether to restart xmit here. */
+ struct napi_struct xmit_napi;
+
/* Number of input buffers, and max we've ever had. */
unsigned int num, max;
@@ -60,6 +63,9 @@ struct virtnet_info
struct sk_buff_head recv;
struct sk_buff_head send;
+ /* Capacity left in xmit queue. */
+ unsigned int capacity;
+
/* Work struct for refilling if we run low on memory. */
struct delayed_work refill;
@@ -111,11 +117,8 @@ static void skb_xmit_done(struct virtque
{
struct virtnet_info *vi = svq->vdev->priv;
- /* Suppress further interrupts. */
- svq->vq_ops->disable_cb(svq);
-
/* We were probably waiting for more output buffers. */
- netif_wake_queue(vi->dev);
+ napi_schedule(&vi->xmit_napi);
}
static void receive_skb(struct net_device *dev, struct sk_buff *skb,
@@ -446,7 +449,7 @@ static unsigned int free_old_xmit_skbs(s
while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) {
pr_debug("Sent skb %p\n", skb);
- __skb_unlink(skb, &vi->send);
+ skb_unlink(skb, &vi->send);
vi->dev->stats.tx_bytes += skb->len;
vi->dev->stats.tx_packets++;
tot_sgs += skb_vnet_hdr(skb)->num_sg;
@@ -455,6 +458,23 @@ static unsigned int free_old_xmit_skbs(s
return tot_sgs;
}
+static int virtnet_xmit_poll(struct napi_struct *xmit_napi, int budget)
+{
+ struct virtnet_info *vi =
+ container_of(xmit_napi, struct virtnet_info, xmit_napi);
+
+ if (netif_queue_stopped(vi->dev)) {
+ vi->capacity += free_old_xmit_skbs(vi);
+ if (vi->capacity >= 2 + MAX_SKB_FRAGS) {
+ /* Suppress further xmit interrupts. */
+ vi->svq->vq_ops->disable_cb(vi->svq);
+ napi_complete(xmit_napi);
+ netif_wake_queue(vi->dev);
+ }
+ }
+ return 1;
+}
+
static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
{
struct scatterlist sg[2+MAX_SKB_FRAGS];
@@ -509,34 +529,22 @@ static netdev_tx_t start_xmit(struct sk_
struct virtnet_info *vi = netdev_priv(dev);
int capacity;
-again:
- /* Free up any pending old buffers before queueing new ones. */
- free_old_xmit_skbs(vi);
-
/* Try to transmit */
+ skb_queue_head(&vi->send, skb);
capacity = xmit_skb(vi, skb);
/* This can happen with OOM and indirect buffers. */
if (unlikely(capacity < 0)) {
+ skb_unlink(skb, &vi->send);
netif_stop_queue(dev);
dev_warn(&dev->dev, "Unexpected full queue\n");
- if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
- vi->svq->vq_ops->disable_cb(vi->svq);
- netif_start_queue(dev);
- goto again;
- }
+ /* If we missed an interrupt, we let virtnet_xmit_poll deal. */
+ if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq)))
+ napi_schedule(&vi->xmit_napi);
return NETDEV_TX_BUSY;
}
vi->svq->vq_ops->kick(vi->svq);
-
- /*
- * Put new one in send queue. You'd expect we'd need this before
- * xmit_skb calls add_buf(), since the callback can be triggered
- * immediately after that. But since the callback just triggers
- * another call back here, normal network xmit locking prevents the
- * race.
- */
- __skb_queue_head(&vi->send, skb);
+ vi->capacity = capacity;
/* Don't wait up for transmitted skbs to be freed. */
skb_orphan(skb);
@@ -544,15 +552,16 @@ again:
/* Apparently nice girls don't return TX_BUSY; stop the queue
* before it gets out of hand. Naturally, this wastes entries. */
- if (capacity < 2+MAX_SKB_FRAGS) {
- netif_stop_queue(dev);
- if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
- /* More just got used, free them then recheck. */
- capacity += free_old_xmit_skbs(vi);
- if (capacity >= 2+MAX_SKB_FRAGS) {
- netif_start_queue(dev);
- vi->svq->vq_ops->disable_cb(vi->svq);
- }
+ if (unlikely(capacity < 2+MAX_SKB_FRAGS)) {
+ /* Free old skbs; might make more capacity. */
+ vi->capacity = capacity + free_old_xmit_skbs(vi);
+ if (unlikely(vi->capacity < 2+MAX_SKB_FRAGS)) {
+ /* Make sure virtnet_xmit_poll sees updated capacity */
+ wmb();
+ netif_stop_queue(dev);
+ /* Missed xmit irq? virtnet_xmit_poll will deal. */
+ if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq)))
+ napi_schedule(&vi->xmit_napi);
}
}
@@ -590,6 +599,7 @@ static int virtnet_open(struct net_devic
struct virtnet_info *vi = netdev_priv(dev);
napi_enable(&vi->napi);
+ napi_enable(&vi->xmit_napi);
/* If all buffers were filled by other side before we napi_enabled, we
* won't get another interrupt, so process any outstanding packets
@@ -652,6 +662,7 @@ static int virtnet_close(struct net_devi
struct virtnet_info *vi = netdev_priv(dev);
napi_disable(&vi->napi);
+ napi_disable(&vi->xmit_napi);
return 0;
}
@@ -883,6 +894,7 @@ static int virtnet_probe(struct virtio_d
/* Set up our device-specific information */
vi = netdev_priv(dev);
netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
+ netif_napi_add(dev, &vi->xmit_napi, virtnet_xmit_poll, 64);
vi->dev = dev;
vi->vdev = vdev;
vdev->priv = vi;
next prev parent reply other threads:[~2009-12-16 12:45 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-12-08 22:50 [RFC PATCH] Regression in linux 2.6.32 virtio_net seen with vhost-net Sridhar Samudrala
2009-12-13 12:25 ` Herbert Xu
2009-12-13 23:40 ` Michael S. Tsirkin
2009-12-15 14:42 ` Herbert Xu
2009-12-15 16:26 ` Sridhar Samudrala
2009-12-16 1:21 ` Herbert Xu
2009-12-15 23:32 ` Michael S. Tsirkin
2009-12-16 1:58 ` Herbert Xu
2009-12-16 4:37 ` Rusty Russell
2009-12-16 10:37 ` Michael S. Tsirkin
2009-12-16 2:41 ` Rusty Russell
2009-12-16 2:53 ` Herbert Xu
2009-12-16 12:45 ` Rusty Russell [this message]
2009-12-16 13:22 ` Michael S. Tsirkin
2009-12-16 13:35 ` Herbert Xu
2009-12-16 13:38 ` Michael S. Tsirkin
2009-12-16 13:48 ` Herbert Xu
2009-12-17 2:02 ` Rusty Russell
2009-12-17 9:25 ` Michael S. Tsirkin
2009-12-18 1:55 ` Rusty Russell
2009-12-16 13:30 ` Herbert Xu
2009-12-17 1:43 ` Sridhar Samudrala
2009-12-17 3:12 ` Herbert Xu
2009-12-17 5:02 ` Sridhar Samudrala
2009-12-17 3:15 ` Herbert Xu
2009-12-17 5:05 ` Sridhar Samudrala
2009-12-17 6:28 ` Herbert Xu
2009-12-17 6:45 ` Sridhar Samudrala
2009-12-17 10:03 ` Krishna Kumar2
2009-12-17 11:27 ` Jarek Poplawski
2009-12-17 11:45 ` Herbert Xu
2009-12-17 11:49 ` Herbert Xu
2009-12-17 12:08 ` Herbert Xu
2009-12-17 12:27 ` Krishna Kumar2
2009-12-17 12:42 ` Jarek Poplawski
2009-12-17 12:56 ` Herbert Xu
2009-12-17 13:22 ` Krishna Kumar2
2009-12-17 13:04 ` Krishna Kumar2
2009-12-17 13:44 ` Herbert Xu
2009-12-17 14:35 ` Krishna Kumar2
2009-12-17 14:36 ` Herbert Xu
2009-12-17 21:50 ` Sridhar Samudrala
2009-12-17 22:28 ` Sridhar Samudrala
2009-12-17 22:41 ` Jarek Poplawski
2009-12-18 13:46 ` Krishna Kumar2
2009-12-18 19:13 ` Sridhar Samudrala
2009-12-17 11:59 ` Krishna Kumar2
2009-12-17 12:19 ` Jarek Poplawski
2009-12-17 11:56 ` Krishna Kumar2
2009-12-17 13:17 ` Jarek Poplawski
2009-12-17 14:10 ` Krishna Kumar2
2009-12-17 14:16 ` Herbert Xu
2009-12-16 17:42 ` Sridhar Samudrala
-- strict thread matches above, loose matches on Subject: below --
2009-12-17 11:20 Krishna Kumar
2009-12-17 19:28 ` Jarek Poplawski
[not found] <20091217111219.9809.27432.sendpatchset@krkumar2.in.ibm.com>
[not found] ` <20091217123153.GA31131@gondor.apana.org.au>
2009-12-17 12:56 ` Krishna Kumar2
2009-12-17 13:40 ` Herbert Xu
2009-12-17 13:56 ` Krishna Kumar2
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200912162315.38802.rusty@rustcorp.com.au \
--to=rusty@rustcorp.com.au \
--cc=herbert@gondor.apana.org.au \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=sri@us.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.