From: Rusty Russell <rusty@rustcorp.com.au>
To: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>,
Sridhar Samudrala <sri@us.ibm.com>,
netdev@vger.kernel.org
Subject: Re: [RFC PATCH] Regression in linux 2.6.32 virtio_net seen with vhost-net
Date: Thu, 17 Dec 2009 12:32:26 +1030 [thread overview]
Message-ID: <200912171232.26743.rusty@rustcorp.com.au> (raw)
In-Reply-To: <20091216132217.GA29494@redhat.com>
On Wed, 16 Dec 2009 11:52:18 pm Michael S. Tsirkin wrote:
> On Wed, Dec 16, 2009 at 11:15:38PM +1030, Rusty Russell wrote:
> > + struct virtnet_info *vi =
> > + container_of(xmit_napi, struct virtnet_info, xmit_napi);
> > +
> > + if (netif_queue_stopped(vi->dev)) {
>
> I am a bit concerned here: for example, on link down
> you do netif_stop_queue, and start on link up.
> So is it enough to check netif_queue_stopped
> to verify that tx is not running and that this is because
> it was out of capacity?
>
> It would be very bad if this run in parallel with TX ...
Yeah, I wasn't happy. This version uses the tx lock (we're single-queued,
so I used the __ version)
virtio_net: use NAPI for xmit (UNTESTED)
This is closer to the way tg3 and ixgbe do it: use the NAPI framework to
free transmitted packets. It neatens things a little as well.
Changes since last version:
1) Use the tx lock for the xmit_poll to synchronize against
start_xmit; it might be overkill, but it's simple.
2) Don't wake queue if the carrier is gone.
(Note: a side effect of this is that we are lazier in freeing old xmit skbs.
This might be a slight win).
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -47,6 +47,9 @@ struct virtnet_info
struct napi_struct napi;
unsigned int status;
+ /* We free packets and decide whether to restart xmit here. */
+ struct napi_struct xmit_napi;
+
/* Number of input buffers, and max we've ever had. */
unsigned int num, max;
@@ -60,6 +63,9 @@ struct virtnet_info
struct sk_buff_head recv;
struct sk_buff_head send;
+ /* Capacity left in xmit queue. */
+ unsigned int capacity;
+
/* Work struct for refilling if we run low on memory. */
struct delayed_work refill;
@@ -111,11 +117,8 @@ static void skb_xmit_done(struct virtque
{
struct virtnet_info *vi = svq->vdev->priv;
- /* Suppress further interrupts. */
- svq->vq_ops->disable_cb(svq);
-
/* We were probably waiting for more output buffers. */
- netif_wake_queue(vi->dev);
+ napi_schedule(&vi->xmit_napi);
}
static void receive_skb(struct net_device *dev, struct sk_buff *skb,
@@ -455,6 +458,29 @@ static unsigned int free_old_xmit_skbs(s
return tot_sgs;
}
+static int virtnet_xmit_poll(struct napi_struct *xmit_napi, int budget)
+{
+ struct virtnet_info *vi =
+ container_of(xmit_napi, struct virtnet_info, xmit_napi);
+
+ /* Don't access vq/capacity at same time as start_xmit. */
+ __netif_tx_lock(netdev_get_tx_queue(vi->dev, 0), smp_processor_id());
+
+ vi->capacity += free_old_xmit_skbs(vi);
+ if (vi->capacity >= 2 + MAX_SKB_FRAGS) {
+ /* Suppress further xmit interrupts. */
+ vi->svq->vq_ops->disable_cb(vi->svq);
+ napi_complete(xmit_napi);
+
+ /* Don't wake it if link is down. */
+ if (likely(netif_carrier_ok(vi->vdev)))
+ netif_wake_queue(vi->dev);
+ }
+
+ __netif_tx_unlock(netdev_get_tx_queue(vi->dev, 0));
+ return 1;
+}
+
static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
{
struct scatterlist sg[2+MAX_SKB_FRAGS];
@@ -509,10 +535,6 @@ static netdev_tx_t start_xmit(struct sk_
struct virtnet_info *vi = netdev_priv(dev);
int capacity;
-again:
- /* Free up any pending old buffers before queueing new ones. */
- free_old_xmit_skbs(vi);
-
/* Try to transmit */
capacity = xmit_skb(vi, skb);
@@ -520,14 +542,13 @@ again:
if (unlikely(capacity < 0)) {
netif_stop_queue(dev);
dev_warn(&dev->dev, "Unexpected full queue\n");
- if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
- vi->svq->vq_ops->disable_cb(vi->svq);
- netif_start_queue(dev);
- goto again;
- }
+ /* If we missed an interrupt, we let virtnet_xmit_poll deal. */
+ if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq)))
+ napi_schedule(&vi->xmit_napi);
return NETDEV_TX_BUSY;
}
vi->svq->vq_ops->kick(vi->svq);
+ vi->capacity = capacity;
/*
* Put new one in send queue. You'd expect we'd need this before
@@ -545,14 +566,13 @@ again:
/* Apparently nice girls don't return TX_BUSY; stop the queue
* before it gets out of hand. Naturally, this wastes entries. */
if (capacity < 2+MAX_SKB_FRAGS) {
- netif_stop_queue(dev);
- if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
- /* More just got used, free them then recheck. */
- capacity += free_old_xmit_skbs(vi);
- if (capacity >= 2+MAX_SKB_FRAGS) {
- netif_start_queue(dev);
- vi->svq->vq_ops->disable_cb(vi->svq);
- }
+ /* Free old skbs; might make more capacity. */
+ vi->capacity = capacity + free_old_xmit_skbs(vi);
+ if (unlikely(vi->capacity < 2+MAX_SKB_FRAGS)) {
+ netif_stop_queue(dev);
+ /* Missed xmit irq? virtnet_xmit_poll will deal. */
+ if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq)))
+ napi_schedule(&vi->xmit_napi);
}
}
@@ -590,6 +610,7 @@ static int virtnet_open(struct net_devic
struct virtnet_info *vi = netdev_priv(dev);
napi_enable(&vi->napi);
+ napi_enable(&vi->xmit_napi);
/* If all buffers were filled by other side before we napi_enabled, we
* won't get another interrupt, so process any outstanding packets
@@ -652,6 +673,7 @@ static int virtnet_close(struct net_devi
struct virtnet_info *vi = netdev_priv(dev);
napi_disable(&vi->napi);
+ napi_disable(&vi->xmit_napi);
return 0;
}
@@ -818,9 +840,13 @@ static void virtnet_update_status(struct
if (vi->status & VIRTIO_NET_S_LINK_UP) {
netif_carrier_on(vi->dev);
- netif_wake_queue(vi->dev);
+ /* Make sure virtnet_xmit_poll sees carrier enabled. */
+ wmb();
+ napi_schedule(&vi->xmit_napi);
} else {
netif_carrier_off(vi->dev);
+ /* Make sure virtnet_xmit_poll sees carrier disabled. */
+ wmb();
netif_stop_queue(vi->dev);
}
}
@@ -883,6 +909,7 @@ static int virtnet_probe(struct virtio_d
/* Set up our device-specific information */
vi = netdev_priv(dev);
netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
+ netif_napi_add(dev, &vi->xmit_napi, virtnet_xmit_poll, 64);
vi->dev = dev;
vi->vdev = vdev;
vdev->priv = vi;
next prev parent reply other threads:[~2009-12-17 2:02 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-12-08 22:50 [RFC PATCH] Regression in linux 2.6.32 virtio_net seen with vhost-net Sridhar Samudrala
2009-12-13 12:25 ` Herbert Xu
2009-12-13 23:40 ` Michael S. Tsirkin
2009-12-15 14:42 ` Herbert Xu
2009-12-15 16:26 ` Sridhar Samudrala
2009-12-16 1:21 ` Herbert Xu
2009-12-15 23:32 ` Michael S. Tsirkin
2009-12-16 1:58 ` Herbert Xu
2009-12-16 4:37 ` Rusty Russell
2009-12-16 10:37 ` Michael S. Tsirkin
2009-12-16 2:41 ` Rusty Russell
2009-12-16 2:53 ` Herbert Xu
2009-12-16 12:45 ` Rusty Russell
2009-12-16 13:22 ` Michael S. Tsirkin
2009-12-16 13:35 ` Herbert Xu
2009-12-16 13:38 ` Michael S. Tsirkin
2009-12-16 13:48 ` Herbert Xu
2009-12-17 2:02 ` Rusty Russell [this message]
2009-12-17 9:25 ` Michael S. Tsirkin
2009-12-18 1:55 ` Rusty Russell
2009-12-16 13:30 ` Herbert Xu
2009-12-17 1:43 ` Sridhar Samudrala
2009-12-17 3:12 ` Herbert Xu
2009-12-17 5:02 ` Sridhar Samudrala
2009-12-17 3:15 ` Herbert Xu
2009-12-17 5:05 ` Sridhar Samudrala
2009-12-17 6:28 ` Herbert Xu
2009-12-17 6:45 ` Sridhar Samudrala
2009-12-17 10:03 ` Krishna Kumar2
2009-12-17 11:27 ` Jarek Poplawski
2009-12-17 11:45 ` Herbert Xu
2009-12-17 11:49 ` Herbert Xu
2009-12-17 12:08 ` Herbert Xu
2009-12-17 12:27 ` Krishna Kumar2
2009-12-17 12:42 ` Jarek Poplawski
2009-12-17 12:56 ` Herbert Xu
2009-12-17 13:22 ` Krishna Kumar2
2009-12-17 13:04 ` Krishna Kumar2
2009-12-17 13:44 ` Herbert Xu
2009-12-17 14:35 ` Krishna Kumar2
2009-12-17 14:36 ` Herbert Xu
2009-12-17 21:50 ` Sridhar Samudrala
2009-12-17 22:28 ` Sridhar Samudrala
2009-12-17 22:41 ` Jarek Poplawski
2009-12-18 13:46 ` Krishna Kumar2
2009-12-18 19:13 ` Sridhar Samudrala
2009-12-17 11:59 ` Krishna Kumar2
2009-12-17 12:19 ` Jarek Poplawski
2009-12-17 11:56 ` Krishna Kumar2
2009-12-17 13:17 ` Jarek Poplawski
2009-12-17 14:10 ` Krishna Kumar2
2009-12-17 14:16 ` Herbert Xu
2009-12-16 17:42 ` Sridhar Samudrala
-- strict thread matches above, loose matches on Subject: below --
2009-12-17 11:20 Krishna Kumar
2009-12-17 19:28 ` Jarek Poplawski
[not found] <20091217111219.9809.27432.sendpatchset@krkumar2.in.ibm.com>
[not found] ` <20091217123153.GA31131@gondor.apana.org.au>
2009-12-17 12:56 ` Krishna Kumar2
2009-12-17 13:40 ` Herbert Xu
2009-12-17 13:56 ` Krishna Kumar2
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200912171232.26743.rusty@rustcorp.com.au \
--to=rusty@rustcorp.com.au \
--cc=herbert@gondor.apana.org.au \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=sri@us.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).