All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: "Michael S. Tsirkin" <mst@redhat.com>, linux-kernel@vger.kernel.org
Cc: netdev@vger.kernel.org, Eric Dumazet <edumazet@google.com>,
	Michael Dalton <mwdalton@google.com>,
	virtualization@lists.linux-foundation.org
Subject: Re: [PATCH 1/2] virtio_net: fix error handling for mergeable buffers
Date: Thu, 28 Nov 2013 11:14:45 +0800	[thread overview]
Message-ID: <5296B525.5060509@redhat.com> (raw)
In-Reply-To: <1385569684-26595-1-git-send-email-mst@redhat.com>

On 11/28/2013 12:31 AM, Michael S. Tsirkin wrote:
> Eric Dumazet noticed that if we encounter an error
> when processing a mergeable buffer, we don't
> dequeue all of the buffers from this packet,
> the result is almost sure to be loss of networking.
>
> Jason Wang noticed that we also leak a page and that we don't decrement
> the rq buf count, so we won't repost buffers (a resource leak).
>
> Fix both issues, and also make the logic a bit more
> robust against device errors by not looping when e.g. because of a leak
> like the one we are fixing here the number of buffers is 0.
>
> Cc: Rusty Russell <rusty@rustcorp.com.au>
> Cc: Michael Dalton <mwdalton@google.com>
> Reported-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>
> Note: this bugfix is needed on stable too, but backport
> might not be trivial.
> I'll send a backport for stable separately.

That will be fine.
>
>  drivers/net/virtio_net.c | 84 ++++++++++++++++++++++++++++++------------------
>  1 file changed, 52 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7bab4de..0e6ea69 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -299,41 +299,53 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
>  	return skb;
>  }
>  
> -static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb)
> +static struct sk_buff *receive_mergeable(struct net_device *dev,
> +					 struct receive_queue *rq,
> +					 void *buf,
> +					 unsigned int len)
>  {
> -	struct skb_vnet_hdr *hdr = skb_vnet_hdr(head_skb);
> +	struct skb_vnet_hdr *hdr = buf;
> +	int num_buf = hdr->mhdr.num_buffers;
> +	struct page *page = virt_to_head_page(buf);
> +	int offset = buf - page_address(page);
> +	struct sk_buff *head_skb = page_to_skb(rq, page, offset, len,
> +					       MERGE_BUFFER_LEN);
>  	struct sk_buff *curr_skb = head_skb;
> -	char *buf;
> -	struct page *page;
> -	int num_buf, len, offset;
>  
> -	num_buf = hdr->mhdr.num_buffers;
> +	if (unlikely(!curr_skb))
> +		goto err_skb;
> +
>  	while (--num_buf) {
> -		int num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
> +		int num_skb_frags;
> +
>  		buf = virtqueue_get_buf(rq->vq, &len);
>  		if (unlikely(!buf)) {
> -			pr_debug("%s: rx error: %d buffers missing\n",
> -				 head_skb->dev->name, hdr->mhdr.num_buffers);
> -			head_skb->dev->stats.rx_length_errors++;
> -			return -EINVAL;
> +			pr_debug("%s: rx error: %d buffers out of %d missing\n",
> +				 dev->name, num_buf, hdr->mhdr.num_buffers);
> +			dev->stats.rx_length_errors++;
> +			goto err_buf;

Not sure it's correct here. Since the we break immediately if buffer is
missed in err_buf and rx_length_error will be miss counted.

Maybe an ERR_PTR(-EINVAL) is better.
>  		}
>  		if (unlikely(len > MERGE_BUFFER_LEN)) {
>  			pr_debug("%s: rx error: merge buffer too long\n",
> -				 head_skb->dev->name);
> +				 dev->name);
>  			len = MERGE_BUFFER_LEN;
>  		}
> +
> +		page = virt_to_head_page(buf);
> +		--rq->num;
> +
> +		num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
>  		if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
>  			struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
> -			if (unlikely(!nskb)) {
> -				head_skb->dev->stats.rx_dropped++;
> -				return -ENOMEM;
> -			}
> +
> +			if (unlikely(!nskb))
> +				goto err_skb;
>  			if (curr_skb == head_skb)
>  				skb_shinfo(curr_skb)->frag_list = nskb;
>  			else
>  				curr_skb->next = nskb;
> -			curr_skb = nskb;
>  			head_skb->truesize += nskb->truesize;
> +			curr_skb = nskb;

This change seems unnecessary.

Other looks good.
>  			num_skb_frags = 0;
>  		}
>  		if (curr_skb != head_skb) {
> @@ -341,8 +353,7 @@ static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb)
>  			head_skb->len += len;
>  			head_skb->truesize += MERGE_BUFFER_LEN;
>  		}
> -		page = virt_to_head_page(buf);
> -		offset = buf - (char *)page_address(page);
> +		offset = buf - page_address(page);
>  		if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
>  			put_page(page);
>  			skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
> @@ -351,9 +362,28 @@ static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb)
>  			skb_add_rx_frag(curr_skb, num_skb_frags, page,
>  					offset, len, MERGE_BUFFER_LEN);
>  		}
> +	}
> +
> +	return head_skb;
> +
> +err_skb:
> +	put_page(page);
> +err_buf:
> +	dev->stats.rx_dropped++;
> +	dev_kfree_skb(head_skb);
> +	while (--num_buf) {
> +		buf = virtqueue_get_buf(rq->vq, &len);
> +		if (unlikely(!buf)) {
> +			pr_debug("%s: rx error: %d buffers missing\n",
> +				 dev->name, num_buf);
> +			dev->stats.rx_length_errors++;
> +			break;
> +		}
> +		page = virt_to_head_page(buf);
> +		put_page(page);
>  		--rq->num;
>  	}
> -	return 0;
> +	return NULL;
>  }
>  
>  static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
> @@ -382,19 +412,9 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
>  		len -= sizeof(struct virtio_net_hdr);
>  		skb_trim(skb, len);
>  	} else if (vi->mergeable_rx_bufs) {
> -		struct page *page = virt_to_head_page(buf);
> -		skb = page_to_skb(rq, page,
> -				  (char *)buf - (char *)page_address(page),
> -				  len, MERGE_BUFFER_LEN);
> -		if (unlikely(!skb)) {
> -			dev->stats.rx_dropped++;
> -			put_page(page);
> +		skb = receive_mergeable(dev, rq, buf, len);
> +		if (unlikely(!skb))
>  			return;
> -		}
> -		if (receive_mergeable(rq, skb)) {
> -			dev_kfree_skb(skb);
> -			return;
> -		}
>  	} else {
>  		page = buf;
>  		skb = page_to_skb(rq, page, 0, len, PAGE_SIZE);

WARNING: multiple messages have this Message-ID (diff)
From: Jason Wang <jasowang@redhat.com>
To: "Michael S. Tsirkin" <mst@redhat.com>, linux-kernel@vger.kernel.org
Cc: Rusty Russell <rusty@rustcorp.com.au>,
	Michael Dalton <mwdalton@google.com>,
	Eric Dumazet <edumazet@google.com>,
	virtualization@lists.linux-foundation.org,
	netdev@vger.kernel.org
Subject: Re: [PATCH 1/2] virtio_net: fix error handling for mergeable buffers
Date: Thu, 28 Nov 2013 11:14:45 +0800	[thread overview]
Message-ID: <5296B525.5060509@redhat.com> (raw)
In-Reply-To: <1385569684-26595-1-git-send-email-mst@redhat.com>

On 11/28/2013 12:31 AM, Michael S. Tsirkin wrote:
> Eric Dumazet noticed that if we encounter an error
> when processing a mergeable buffer, we don't
> dequeue all of the buffers from this packet,
> the result is almost sure to be loss of networking.
>
> Jason Wang noticed that we also leak a page and that we don't decrement
> the rq buf count, so we won't repost buffers (a resource leak).
>
> Fix both issues, and also make the logic a bit more
> robust against device errors by not looping when e.g. because of a leak
> like the one we are fixing here the number of buffers is 0.
>
> Cc: Rusty Russell <rusty@rustcorp.com.au>
> Cc: Michael Dalton <mwdalton@google.com>
> Reported-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>
> Note: this bugfix is needed on stable too, but backport
> might not be trivial.
> I'll send a backport for stable separately.

That will be fine.
>
>  drivers/net/virtio_net.c | 84 ++++++++++++++++++++++++++++++------------------
>  1 file changed, 52 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7bab4de..0e6ea69 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -299,41 +299,53 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
>  	return skb;
>  }
>  
> -static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb)
> +static struct sk_buff *receive_mergeable(struct net_device *dev,
> +					 struct receive_queue *rq,
> +					 void *buf,
> +					 unsigned int len)
>  {
> -	struct skb_vnet_hdr *hdr = skb_vnet_hdr(head_skb);
> +	struct skb_vnet_hdr *hdr = buf;
> +	int num_buf = hdr->mhdr.num_buffers;
> +	struct page *page = virt_to_head_page(buf);
> +	int offset = buf - page_address(page);
> +	struct sk_buff *head_skb = page_to_skb(rq, page, offset, len,
> +					       MERGE_BUFFER_LEN);
>  	struct sk_buff *curr_skb = head_skb;
> -	char *buf;
> -	struct page *page;
> -	int num_buf, len, offset;
>  
> -	num_buf = hdr->mhdr.num_buffers;
> +	if (unlikely(!curr_skb))
> +		goto err_skb;
> +
>  	while (--num_buf) {
> -		int num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
> +		int num_skb_frags;
> +
>  		buf = virtqueue_get_buf(rq->vq, &len);
>  		if (unlikely(!buf)) {
> -			pr_debug("%s: rx error: %d buffers missing\n",
> -				 head_skb->dev->name, hdr->mhdr.num_buffers);
> -			head_skb->dev->stats.rx_length_errors++;
> -			return -EINVAL;
> +			pr_debug("%s: rx error: %d buffers out of %d missing\n",
> +				 dev->name, num_buf, hdr->mhdr.num_buffers);
> +			dev->stats.rx_length_errors++;
> +			goto err_buf;

Not sure it's correct here. Since the we break immediately if buffer is
missed in err_buf and rx_length_error will be miss counted.

Maybe an ERR_PTR(-EINVAL) is better.
>  		}
>  		if (unlikely(len > MERGE_BUFFER_LEN)) {
>  			pr_debug("%s: rx error: merge buffer too long\n",
> -				 head_skb->dev->name);
> +				 dev->name);
>  			len = MERGE_BUFFER_LEN;
>  		}
> +
> +		page = virt_to_head_page(buf);
> +		--rq->num;
> +
> +		num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
>  		if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
>  			struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
> -			if (unlikely(!nskb)) {
> -				head_skb->dev->stats.rx_dropped++;
> -				return -ENOMEM;
> -			}
> +
> +			if (unlikely(!nskb))
> +				goto err_skb;
>  			if (curr_skb == head_skb)
>  				skb_shinfo(curr_skb)->frag_list = nskb;
>  			else
>  				curr_skb->next = nskb;
> -			curr_skb = nskb;
>  			head_skb->truesize += nskb->truesize;
> +			curr_skb = nskb;

This change seems unnecessary.

Other looks good.
>  			num_skb_frags = 0;
>  		}
>  		if (curr_skb != head_skb) {
> @@ -341,8 +353,7 @@ static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb)
>  			head_skb->len += len;
>  			head_skb->truesize += MERGE_BUFFER_LEN;
>  		}
> -		page = virt_to_head_page(buf);
> -		offset = buf - (char *)page_address(page);
> +		offset = buf - page_address(page);
>  		if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
>  			put_page(page);
>  			skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
> @@ -351,9 +362,28 @@ static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb)
>  			skb_add_rx_frag(curr_skb, num_skb_frags, page,
>  					offset, len, MERGE_BUFFER_LEN);
>  		}
> +	}
> +
> +	return head_skb;
> +
> +err_skb:
> +	put_page(page);
> +err_buf:
> +	dev->stats.rx_dropped++;
> +	dev_kfree_skb(head_skb);
> +	while (--num_buf) {
> +		buf = virtqueue_get_buf(rq->vq, &len);
> +		if (unlikely(!buf)) {
> +			pr_debug("%s: rx error: %d buffers missing\n",
> +				 dev->name, num_buf);
> +			dev->stats.rx_length_errors++;
> +			break;
> +		}
> +		page = virt_to_head_page(buf);
> +		put_page(page);
>  		--rq->num;
>  	}
> -	return 0;
> +	return NULL;
>  }
>  
>  static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
> @@ -382,19 +412,9 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
>  		len -= sizeof(struct virtio_net_hdr);
>  		skb_trim(skb, len);
>  	} else if (vi->mergeable_rx_bufs) {
> -		struct page *page = virt_to_head_page(buf);
> -		skb = page_to_skb(rq, page,
> -				  (char *)buf - (char *)page_address(page),
> -				  len, MERGE_BUFFER_LEN);
> -		if (unlikely(!skb)) {
> -			dev->stats.rx_dropped++;
> -			put_page(page);
> +		skb = receive_mergeable(dev, rq, buf, len);
> +		if (unlikely(!skb))
>  			return;
> -		}
> -		if (receive_mergeable(rq, skb)) {
> -			dev_kfree_skb(skb);
> -			return;
> -		}
>  	} else {
>  		page = buf;
>  		skb = page_to_skb(rq, page, 0, len, PAGE_SIZE);


  parent reply	other threads:[~2013-11-28  3:14 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-27 16:31 [PATCH 1/2] virtio_net: fix error handling for mergeable buffers Michael S. Tsirkin
2013-11-27 16:31 ` Michael S. Tsirkin
2013-11-27 16:31 ` [PATCH 2/2] virtio-net: make all RX paths handle erors consistently Michael S. Tsirkin
2013-11-27 16:31   ` Michael S. Tsirkin
2013-11-28  6:06   ` Jason Wang
2013-11-28  6:06     ` Jason Wang
2013-11-28  3:14 ` Jason Wang [this message]
2013-11-28  3:14   ` [PATCH 1/2] virtio_net: fix error handling for mergeable buffers Jason Wang
2013-11-28 11:14   ` Michael S. Tsirkin
2013-11-28 11:14     ` Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5296B525.5060509@redhat.com \
    --to=jasowang@redhat.com \
    --cc=edumazet@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=mwdalton@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.