All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Rusty Russell <rusty@rustcorp.com.au>
Cc: Herbert Xu <herbert@gondor.apana.org.au>,
	Sridhar Samudrala <sri@us.ibm.com>,
	netdev@vger.kernel.org
Subject: Re: [RFC PATCH] Regression in linux 2.6.32 virtio_net seen with vhost-net
Date: Thu, 17 Dec 2009 11:25:31 +0200	[thread overview]
Message-ID: <20091217092531.GA4905@redhat.com> (raw)
In-Reply-To: <200912171232.26743.rusty@rustcorp.com.au>

On Thu, Dec 17, 2009 at 12:32:26PM +1030, Rusty Russell wrote:
> On Wed, 16 Dec 2009 11:52:18 pm Michael S. Tsirkin wrote:
> > On Wed, Dec 16, 2009 at 11:15:38PM +1030, Rusty Russell wrote:
> > > +	struct virtnet_info *vi =
> > > +		container_of(xmit_napi, struct virtnet_info, xmit_napi);
> > > +
> > > +	if (netif_queue_stopped(vi->dev)) {
> > 
> > I am a bit concerned here: for example, on link down
> > you do netif_stop_queue, and start on link up.
> > So is it enough to check netif_queue_stopped
> > to verify that tx is not running and that this is because
> > it was out of capacity?
> > 
> > It would be very bad if this run in parallel with TX ...
> 
> Yeah, I wasn't happy.  This version uses the tx lock (we're single-queued,
> so I used the __ version)
> 
> virtio_net: use NAPI for xmit (UNTESTED)
> 
> This is closer to the way tg3 and ixgbe do it: use the NAPI framework to
> free transmitted packets.  It neatens things a little as well.
> 
> Changes since last version:
> 
> 1) Use the tx lock for the xmit_poll to synchronize against
>    start_xmit; it might be overkill, but it's simple.
> 2) Don't wake queue if the carrier is gone.
> 
> (Note: a side effect of this is that we are lazier in freeing old xmit skbs.
>  This might be a slight win).
> 
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

That's very clean. Some questions below:

> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -47,6 +47,9 @@ struct virtnet_info
>  	struct napi_struct napi;
>  	unsigned int status;
>  
> +	/* We free packets and decide whether to restart xmit here. */
> +	struct napi_struct xmit_napi;
> +
>  	/* Number of input buffers, and max we've ever had. */
>  	unsigned int num, max;
>  
> @@ -60,6 +63,9 @@ struct virtnet_info
>  	struct sk_buff_head recv;
>  	struct sk_buff_head send;
>  
> +	/* Capacity left in xmit queue. */
> +	unsigned int capacity;
> +
>  	/* Work struct for refilling if we run low on memory. */
>  	struct delayed_work refill;
>  
> @@ -111,11 +117,8 @@ static void skb_xmit_done(struct virtque
>  {
>  	struct virtnet_info *vi = svq->vdev->priv;
>  
> -	/* Suppress further interrupts. */
> -	svq->vq_ops->disable_cb(svq);
> -
>  	/* We were probably waiting for more output buffers. */
> -	netif_wake_queue(vi->dev);
> +	napi_schedule(&vi->xmit_napi);
>  }
>  
>  static void receive_skb(struct net_device *dev, struct sk_buff *skb,
> @@ -455,6 +458,29 @@ static unsigned int free_old_xmit_skbs(s
>  	return tot_sgs;
>  }
>  
> +static int virtnet_xmit_poll(struct napi_struct *xmit_napi, int budget)
> +{
> +	struct virtnet_info *vi =
> +		container_of(xmit_napi, struct virtnet_info, xmit_napi);
> +
> +	/* Don't access vq/capacity at same time as start_xmit. */
> +	__netif_tx_lock(netdev_get_tx_queue(vi->dev, 0), smp_processor_id());

So now that we are locking, we could build a variant of this
without NAPI (maybe with trylock: we can't spin on xmit lock from
from hard irq context, can we?)? Possibly, if we do, that would be
a small enough change to be applicable in 2.6.32.

> +
> +	vi->capacity += free_old_xmit_skbs(vi);

Should we build a variant of free_old_xmit_skbs
that gets budget, to avoid starving others
while we poll the vq?

> +	if (vi->capacity >= 2 + MAX_SKB_FRAGS) {
> +		/* Suppress further xmit interrupts. */
> +		vi->svq->vq_ops->disable_cb(vi->svq);
> +		napi_complete(xmit_napi);
> +
> +		/* Don't wake it if link is down. */
> +		if (likely(netif_carrier_ok(vi->vdev)))
> +			netif_wake_queue(vi->dev);
> +	}
> +
> +	__netif_tx_unlock(netdev_get_tx_queue(vi->dev, 0));
> +	return 1;
> +}
> +
>  static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
>  {
>  	struct scatterlist sg[2+MAX_SKB_FRAGS];
> @@ -509,10 +535,6 @@ static netdev_tx_t start_xmit(struct sk_
>  	struct virtnet_info *vi = netdev_priv(dev);
>  	int capacity;
>  
> -again:
> -	/* Free up any pending old buffers before queueing new ones. */
> -	free_old_xmit_skbs(vi);
> -
>  	/* Try to transmit */
>  	capacity = xmit_skb(vi, skb);
>  
> @@ -520,14 +542,13 @@ again:
>  	if (unlikely(capacity < 0)) {
>  		netif_stop_queue(dev);
>  		dev_warn(&dev->dev, "Unexpected full queue\n");
> -		if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
> -			vi->svq->vq_ops->disable_cb(vi->svq);
> -			netif_start_queue(dev);
> -			goto again;
> -		}
> +		/* If we missed an interrupt, we let virtnet_xmit_poll deal. */
> +		if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq)))
> +			napi_schedule(&vi->xmit_napi);
>  		return NETDEV_TX_BUSY;
>  	}
>  	vi->svq->vq_ops->kick(vi->svq);
> +	vi->capacity = capacity;
>  
>  	/*
>  	 * Put new one in send queue.  You'd expect we'd need this before
> @@ -545,14 +566,13 @@ again:
>  	/* Apparently nice girls don't return TX_BUSY; stop the queue
>  	 * before it gets out of hand.  Naturally, this wastes entries. */
>  	if (capacity < 2+MAX_SKB_FRAGS) {
> -		netif_stop_queue(dev);
> -		if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
> -			/* More just got used, free them then recheck. */
> -			capacity += free_old_xmit_skbs(vi);
> -			if (capacity >= 2+MAX_SKB_FRAGS) {
> -				netif_start_queue(dev);
> -				vi->svq->vq_ops->disable_cb(vi->svq);
> -			}
> +		/* Free old skbs; might make more capacity. */
> +		vi->capacity = capacity + free_old_xmit_skbs(vi);
> +		if (unlikely(vi->capacity < 2+MAX_SKB_FRAGS)) {
> +			netif_stop_queue(dev);
> +			/* Missed xmit irq? virtnet_xmit_poll will deal. */
> +			if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq)))
> +				napi_schedule(&vi->xmit_napi);
>  		}
>  	}
>  
> @@ -590,6 +610,7 @@ static int virtnet_open(struct net_devic
>  	struct virtnet_info *vi = netdev_priv(dev);
>  
>  	napi_enable(&vi->napi);
> +	napi_enable(&vi->xmit_napi);
>  
>  	/* If all buffers were filled by other side before we napi_enabled, we
>  	 * won't get another interrupt, so process any outstanding packets
> @@ -652,6 +673,7 @@ static int virtnet_close(struct net_devi
>  	struct virtnet_info *vi = netdev_priv(dev);
>  
>  	napi_disable(&vi->napi);
> +	napi_disable(&vi->xmit_napi);
>  
>  	return 0;
>  }
> @@ -818,9 +840,13 @@ static void virtnet_update_status(struct
>  
>  	if (vi->status & VIRTIO_NET_S_LINK_UP) {
>  		netif_carrier_on(vi->dev);
> -		netif_wake_queue(vi->dev);
> +		/* Make sure virtnet_xmit_poll sees carrier enabled. */
> +		wmb();

I think this should be smp_wmb, we are not synchronising with hardware
here. Right?

> +		napi_schedule(&vi->xmit_napi);
>  	} else {
>  		netif_carrier_off(vi->dev);
> +		/* Make sure virtnet_xmit_poll sees carrier disabled. */
> +		wmb();

And here.

>  		netif_stop_queue(vi->dev);
>  	}
>  }
> @@ -883,6 +909,7 @@ static int virtnet_probe(struct virtio_d
>  	/* Set up our device-specific information */
>  	vi = netdev_priv(dev);
>  	netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
> +	netif_napi_add(dev, &vi->xmit_napi, virtnet_xmit_poll, 64);
>  	vi->dev = dev;
>  	vi->vdev = vdev;
>  	vdev->priv = vi;

  reply	other threads:[~2009-12-17  9:28 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-12-08 22:50 [RFC PATCH] Regression in linux 2.6.32 virtio_net seen with vhost-net Sridhar Samudrala
2009-12-13 12:25 ` Herbert Xu
2009-12-13 23:40   ` Michael S. Tsirkin
2009-12-15 14:42     ` Herbert Xu
2009-12-15 16:26       ` Sridhar Samudrala
2009-12-16  1:21         ` Herbert Xu
2009-12-15 23:32       ` Michael S. Tsirkin
2009-12-16  1:58         ` Herbert Xu
2009-12-16  4:37         ` Rusty Russell
2009-12-16 10:37           ` Michael S. Tsirkin
2009-12-16  2:41   ` Rusty Russell
2009-12-16  2:53     ` Herbert Xu
2009-12-16 12:45       ` Rusty Russell
2009-12-16 13:22         ` Michael S. Tsirkin
2009-12-16 13:35           ` Herbert Xu
2009-12-16 13:38             ` Michael S. Tsirkin
2009-12-16 13:48               ` Herbert Xu
2009-12-17  2:02           ` Rusty Russell
2009-12-17  9:25             ` Michael S. Tsirkin [this message]
2009-12-18  1:55               ` Rusty Russell
2009-12-16 13:30         ` Herbert Xu
2009-12-17  1:43         ` Sridhar Samudrala
2009-12-17  3:12           ` Herbert Xu
2009-12-17  5:02             ` Sridhar Samudrala
2009-12-17  3:15           ` Herbert Xu
2009-12-17  5:05             ` Sridhar Samudrala
2009-12-17  6:28               ` Herbert Xu
2009-12-17  6:45                 ` Sridhar Samudrala
2009-12-17 10:03                   ` Krishna Kumar2
2009-12-17 11:27                     ` Jarek Poplawski
2009-12-17 11:45                       ` Herbert Xu
2009-12-17 11:49                         ` Herbert Xu
2009-12-17 12:08                           ` Herbert Xu
2009-12-17 12:27                             ` Krishna Kumar2
2009-12-17 12:42                               ` Jarek Poplawski
2009-12-17 12:56                                 ` Herbert Xu
2009-12-17 13:22                                   ` Krishna Kumar2
2009-12-17 13:04                                 ` Krishna Kumar2
2009-12-17 13:44                               ` Herbert Xu
2009-12-17 14:35                                 ` Krishna Kumar2
2009-12-17 14:36                                   ` Herbert Xu
2009-12-17 21:50                                     ` Sridhar Samudrala
2009-12-17 22:28                                       ` Sridhar Samudrala
2009-12-17 22:41                                         ` Jarek Poplawski
2009-12-18 13:46                                       ` Krishna Kumar2
2009-12-18 19:13                                       ` Sridhar Samudrala
2009-12-17 11:59                         ` Krishna Kumar2
2009-12-17 12:19                         ` Jarek Poplawski
2009-12-17 11:56                       ` Krishna Kumar2
2009-12-17 13:17                         ` Jarek Poplawski
2009-12-17 14:10                           ` Krishna Kumar2
2009-12-17 14:16                             ` Herbert Xu
2009-12-16 17:42     ` Sridhar Samudrala
  -- strict thread matches above, loose matches on Subject: below --
2009-12-17 11:20 Krishna Kumar
2009-12-17 19:28 ` Jarek Poplawski
     [not found] <20091217111219.9809.27432.sendpatchset@krkumar2.in.ibm.com>
     [not found] ` <20091217123153.GA31131@gondor.apana.org.au>
2009-12-17 12:56   ` Krishna Kumar2
2009-12-17 13:40     ` Herbert Xu
2009-12-17 13:56       ` Krishna Kumar2

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091217092531.GA4905@redhat.com \
    --to=mst@redhat.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=netdev@vger.kernel.org \
    --cc=rusty@rustcorp.com.au \
    --cc=sri@us.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.