Re: [PATCH net-next 2/3] virtio-net: use per-receive queue page frag alloc for mergeable bufs

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: "Michael S. Tsirkin" <mst@redhat.com>
To: Michael Dalton <mwdalton@google.com>
Cc: netdev@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	Eric Dumazet <edumazet@google.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: Re: [PATCH net-next 2/3] virtio-net: use per-receive queue page frag alloc for mergeable bufs
Date: Mon, 23 Dec 2013 15:31:06 +0200	[thread overview]
Message-ID: <20131223133106.GC18168@redhat.com> (raw)
In-Reply-To: <1387239389-13216-2-git-send-email-mwdalton@google.com>

On Mon, Dec 16, 2013 at 04:16:28PM -0800, Michael Dalton wrote:
> The virtio-net driver currently uses netdev_alloc_frag() for GFP_ATOMIC
> mergeable rx buffer allocations. This commit migrates virtio-net to use
> per-receive queue page frags for GFP_ATOMIC allocation. This change unifies
> mergeable rx buffer memory allocation, which now will use skb_refill_frag()
> for both atomic and GFP-WAIT buffer allocations.

OK so just to clarify, this is intended as a cleanup
and preparation for 3/3, not as an optimization?
Some notes below.

> 
> To address fragmentation concerns, if after buffer allocation there
> is too little space left in the page frag to allocate a subsequent
> buffer, the remaining space is added to the current allocated buffer
> so that the remaining space can be used to store packet data.
> 
> Signed-off-by: Michael Dalton <mwdalton@google.com>
> ---
>  drivers/net/virtio_net.c | 69 ++++++++++++++++++++++++++----------------------
>  1 file changed, 38 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index c51a988..d38d130 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -78,6 +78,9 @@ struct receive_queue {
>  	/* Chain pages by the private ptr. */
>  	struct page *pages;
>  
> +	/* Page frag for GFP_ATOMIC packet buffer allocation. */
> +	struct page_frag atomic_frag;
> +
>  	/* RX: fragments + linear part + virtio header */
>  	struct scatterlist sg[MAX_SKB_FRAGS + 2];
>  
> @@ -127,9 +130,9 @@ struct virtnet_info {
>  	struct mutex config_lock;
>  
>  	/* Page_frag for GFP_KERNEL packet buffer allocation when we run
> -	 * low on memory.
> +	 * low on memory. May sleep.
>  	 */
> -	struct page_frag alloc_frag;
> +	struct page_frag sleep_frag;
>  
>  	/* Does the affinity hint is set for virtqueues? */
>  	bool affinity_hint_set;
> @@ -336,8 +339,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  	int num_buf = hdr->mhdr.num_buffers;
>  	struct page *page = virt_to_head_page(buf);
>  	int offset = buf - page_address(page);
> -	struct sk_buff *head_skb = page_to_skb(rq, page, offset, len,
> -					       MERGE_BUFFER_LEN);
> +	int truesize = max_t(int, len, MERGE_BUFFER_LEN);
> +	struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, truesize);
>  	struct sk_buff *curr_skb = head_skb;
>  
>  	if (unlikely(!curr_skb))
> @@ -353,11 +356,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  			dev->stats.rx_length_errors++;
>  			goto err_buf;
>  		}
> -		if (unlikely(len > MERGE_BUFFER_LEN)) {
> -			pr_debug("%s: rx error: merge buffer too long\n",
> -				 dev->name);
> -			len = MERGE_BUFFER_LEN;
> -		}
>  
>  		page = virt_to_head_page(buf);
>  		--rq->num;
> @@ -376,19 +374,20 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  			head_skb->truesize += nskb->truesize;
>  			num_skb_frags = 0;
>  		}
> +		truesize = max_t(int, len, MERGE_BUFFER_LEN);
>  		if (curr_skb != head_skb) {
>  			head_skb->data_len += len;
>  			head_skb->len += len;
> -			head_skb->truesize += MERGE_BUFFER_LEN;
> +			head_skb->truesize += truesize;
>  		}
>  		offset = buf - page_address(page);
>  		if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
>  			put_page(page);
>  			skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
> -					     len, MERGE_BUFFER_LEN);
> +					     len, truesize);
>  		} else {
>  			skb_add_rx_frag(curr_skb, num_skb_frags, page,
> -					offset, len, MERGE_BUFFER_LEN);
> +					offset, len, truesize);
>  		}
>  	}
>  
> @@ -579,24 +578,24 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
>  static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
>  {
>  	struct virtnet_info *vi = rq->vq->vdev->priv;
> -	char *buf = NULL;
> -	int err;
> +	struct page_frag *alloc_frag;
> +	char *buf;
> +	int err, len, hole;
>  
> -	if (gfp & __GFP_WAIT) {
> -		if (skb_page_frag_refill(MERGE_BUFFER_LEN, &vi->alloc_frag,
> -					 gfp)) {
> -			buf = (char *)page_address(vi->alloc_frag.page) +
> -			      vi->alloc_frag.offset;
> -			get_page(vi->alloc_frag.page);
> -			vi->alloc_frag.offset += MERGE_BUFFER_LEN;
> -		}
> -	} else {
> -		buf = netdev_alloc_frag(MERGE_BUFFER_LEN);
> -	}
> -	if (!buf)
> +	alloc_frag = (gfp & __GFP_WAIT) ? &vi->sleep_frag : &rq->atomic_frag;
> +	if (unlikely(!skb_page_frag_refill(MERGE_BUFFER_LEN, alloc_frag, gfp)))
>  		return -ENOMEM;

I note that netdev_alloc_frag sets __GFP_COLD which kind of
makes sense.

I also note that netdev_alloc_frag uses some tricks to
reduce the amount of cache lines dirtied - need to look
at whether they actually apply here.

> +	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
> +	get_page(alloc_frag->page);
> +	len = MERGE_BUFFER_LEN;
> +	alloc_frag->offset += len;
> +	hole = alloc_frag->size - alloc_frag->offset;
> +	if (hole < MERGE_BUFFER_LEN) {
> +		len += hole;
> +		alloc_frag->offset += hole;
> +	}
>  
> -	sg_init_one(rq->sg, buf, MERGE_BUFFER_LEN);
> +	sg_init_one(rq->sg, buf, len);
>  	err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
>  	if (err < 0)
>  		put_page(virt_to_head_page(buf));
> @@ -1377,6 +1376,16 @@ static void free_receive_bufs(struct virtnet_info *vi)
>  	}
>  }
>  
> +static void free_receive_page_frags(struct virtnet_info *vi)
> +{
> +	int i;
> +	for (i = 0; i < vi->max_queue_pairs; i++)
> +		if (vi->rq[i].atomic_frag.page)
> +			put_page(vi->rq[i].atomic_frag.page);
> +	if (vi->sleep_frag.page)
> +		put_page(vi->sleep_frag.page);
> +}
> +
>  static void free_unused_bufs(struct virtnet_info *vi)
>  {
>  	void *buf;
> @@ -1706,8 +1715,7 @@ free_recv_bufs:
>  free_vqs:
>  	cancel_delayed_work_sync(&vi->refill);
>  	virtnet_del_vqs(vi);
> -	if (vi->alloc_frag.page)
> -		put_page(vi->alloc_frag.page);
> +	free_receive_page_frags(vi);
>  free_stats:
>  	free_percpu(vi->stats);
>  free:
> @@ -1741,8 +1749,7 @@ static void virtnet_remove(struct virtio_device *vdev)
>  	unregister_netdev(vi->dev);
>  
>  	remove_vq_common(vi);
> -	if (vi->alloc_frag.page)
> -		put_page(vi->alloc_frag.page);
> +	free_receive_page_frags(vi);
>  
>  	flush_work(&vi->config_work);
>  
> -- 
> 1.8.5.1

next prev parent reply	other threads:[~2013-12-23 13:31 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-12-17  0:16 [PATCH net-next 1/3] net: allow > 0 order atomic page alloc in skb_page_frag_refill Michael Dalton
2013-12-17  0:16 ` [PATCH net-next 2/3] virtio-net: use per-receive queue page frag alloc for mergeable bufs Michael Dalton
2013-12-23  8:12   ` Jason Wang
2013-12-23 17:27     ` Eric Dumazet
2013-12-23 19:37       ` Michael S. Tsirkin
2013-12-26 21:28         ` Michael Dalton
2013-12-26 21:37           ` Michael S. Tsirkin
2013-12-26 22:00             ` Eric Dumazet
2014-01-08 17:21               ` Michael S. Tsirkin
2014-01-08 18:09                 ` Eric Dumazet
2014-01-08 18:57                   ` Michael S. Tsirkin
2014-01-08 19:54                   ` David Miller
2014-01-08 21:16                   ` Rick Jones
2013-12-26 21:56           ` Eric Dumazet
2013-12-27  4:55             ` Jason Wang
2013-12-27  5:46               ` Eric Dumazet
2013-12-27  6:12                 ` Jason Wang
2013-12-23 13:31   ` Michael S. Tsirkin [this message]
2013-12-17  0:16 ` [PATCH net-next 3/3] net: auto-tune mergeable rx buffer size for improved performance Michael Dalton
2013-12-23 12:51   ` Michael S. Tsirkin
2013-12-23 13:33   ` Michael S. Tsirkin
2013-12-30 10:14     ` Amos Kong
2014-01-08 17:41       ` Michael S. Tsirkin
2013-12-26  7:33   ` Jason Wang
2013-12-26 20:06     ` Michael Dalton
2013-12-26 20:24       ` Michael S. Tsirkin
2013-12-27  3:04       ` Jason Wang
2013-12-27 21:41         ` Michael Dalton
2013-12-30  4:50           ` Jason Wang
2013-12-30  5:38           ` Jason Wang
2014-01-08 17:37           ` Michael S. Tsirkin
2013-12-19 19:58 ` [PATCH net-next 1/3] net: allow > 0 order atomic page alloc in skb_page_frag_refill David Miller
2013-12-23 13:35   ` Michael S. Tsirkin
2013-12-23  7:52 ` Jason Wang
2013-12-23 17:24   ` Eric Dumazet
2013-12-23 12:53 ` Michael S. Tsirkin
2013-12-23 17:30   ` Eric Dumazet
2013-12-23 19:19     ` Michael S. Tsirkin
2013-12-24 22:46 ` David Miller
2014-01-03  0:42   ` Debabrata Banerjee
2014-01-03  0:56     ` Eric Dumazet
2014-01-03  1:26       ` Eric Dumazet
2014-01-03  1:59         ` Debabrata Banerjee
2014-01-03 22:47           ` Debabrata Banerjee
2014-01-03 22:54             ` Eric Dumazet
2014-01-03 23:27               ` Debabrata Banerjee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20131223133106.GC18168@redhat.com \
    --to=mst@redhat.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=mwdalton@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).