All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Michael Dalton <mwdalton@google.com>
Cc: netdev@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	Eric Dumazet <edumazet@google.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: Re: [PATCH net-next 2/3] virtio-net: use per-receive queue page frag alloc for mergeable bufs
Date: Mon, 23 Dec 2013 15:31:06 +0200	[thread overview]
Message-ID: <20131223133106.GC18168@redhat.com> (raw)
In-Reply-To: <1387239389-13216-2-git-send-email-mwdalton@google.com>

On Mon, Dec 16, 2013 at 04:16:28PM -0800, Michael Dalton wrote:
> The virtio-net driver currently uses netdev_alloc_frag() for GFP_ATOMIC
> mergeable rx buffer allocations. This commit migrates virtio-net to use
> per-receive queue page frags for GFP_ATOMIC allocation. This change unifies
> mergeable rx buffer memory allocation, which now will use skb_refill_frag()
> for both atomic and GFP-WAIT buffer allocations.

OK so just to clarify, this is intended as a cleanup
and preparation for 3/3, not as an optimization?
Some notes below.

> 
> To address fragmentation concerns, if after buffer allocation there
> is too little space left in the page frag to allocate a subsequent
> buffer, the remaining space is added to the current allocated buffer
> so that the remaining space can be used to store packet data.
> 
> Signed-off-by: Michael Dalton <mwdalton@google.com>
> ---
>  drivers/net/virtio_net.c | 69 ++++++++++++++++++++++++++----------------------
>  1 file changed, 38 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index c51a988..d38d130 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -78,6 +78,9 @@ struct receive_queue {
>  	/* Chain pages by the private ptr. */
>  	struct page *pages;
>  
> +	/* Page frag for GFP_ATOMIC packet buffer allocation. */
> +	struct page_frag atomic_frag;
> +
>  	/* RX: fragments + linear part + virtio header */
>  	struct scatterlist sg[MAX_SKB_FRAGS + 2];
>  
> @@ -127,9 +130,9 @@ struct virtnet_info {
>  	struct mutex config_lock;
>  
>  	/* Page_frag for GFP_KERNEL packet buffer allocation when we run
> -	 * low on memory.
> +	 * low on memory. May sleep.
>  	 */
> -	struct page_frag alloc_frag;
> +	struct page_frag sleep_frag;
>  
>  	/* Does the affinity hint is set for virtqueues? */
>  	bool affinity_hint_set;
> @@ -336,8 +339,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  	int num_buf = hdr->mhdr.num_buffers;
>  	struct page *page = virt_to_head_page(buf);
>  	int offset = buf - page_address(page);
> -	struct sk_buff *head_skb = page_to_skb(rq, page, offset, len,
> -					       MERGE_BUFFER_LEN);
> +	int truesize = max_t(int, len, MERGE_BUFFER_LEN);
> +	struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, truesize);
>  	struct sk_buff *curr_skb = head_skb;
>  
>  	if (unlikely(!curr_skb))
> @@ -353,11 +356,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  			dev->stats.rx_length_errors++;
>  			goto err_buf;
>  		}
> -		if (unlikely(len > MERGE_BUFFER_LEN)) {
> -			pr_debug("%s: rx error: merge buffer too long\n",
> -				 dev->name);
> -			len = MERGE_BUFFER_LEN;
> -		}
>  
>  		page = virt_to_head_page(buf);
>  		--rq->num;
> @@ -376,19 +374,20 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  			head_skb->truesize += nskb->truesize;
>  			num_skb_frags = 0;
>  		}
> +		truesize = max_t(int, len, MERGE_BUFFER_LEN);
>  		if (curr_skb != head_skb) {
>  			head_skb->data_len += len;
>  			head_skb->len += len;
> -			head_skb->truesize += MERGE_BUFFER_LEN;
> +			head_skb->truesize += truesize;
>  		}
>  		offset = buf - page_address(page);
>  		if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
>  			put_page(page);
>  			skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
> -					     len, MERGE_BUFFER_LEN);
> +					     len, truesize);
>  		} else {
>  			skb_add_rx_frag(curr_skb, num_skb_frags, page,
> -					offset, len, MERGE_BUFFER_LEN);
> +					offset, len, truesize);
>  		}
>  	}
>  
> @@ -579,24 +578,24 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
>  static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
>  {
>  	struct virtnet_info *vi = rq->vq->vdev->priv;
> -	char *buf = NULL;
> -	int err;
> +	struct page_frag *alloc_frag;
> +	char *buf;
> +	int err, len, hole;
>  
> -	if (gfp & __GFP_WAIT) {
> -		if (skb_page_frag_refill(MERGE_BUFFER_LEN, &vi->alloc_frag,
> -					 gfp)) {
> -			buf = (char *)page_address(vi->alloc_frag.page) +
> -			      vi->alloc_frag.offset;
> -			get_page(vi->alloc_frag.page);
> -			vi->alloc_frag.offset += MERGE_BUFFER_LEN;
> -		}
> -	} else {
> -		buf = netdev_alloc_frag(MERGE_BUFFER_LEN);
> -	}
> -	if (!buf)
> +	alloc_frag = (gfp & __GFP_WAIT) ? &vi->sleep_frag : &rq->atomic_frag;
> +	if (unlikely(!skb_page_frag_refill(MERGE_BUFFER_LEN, alloc_frag, gfp)))
>  		return -ENOMEM;

I note that netdev_alloc_frag sets __GFP_COLD which kind of
makes sense.

I also note that netdev_alloc_frag uses some tricks to
reduce the amount of cache lines dirtied - need to look
at whether they actually apply here.

> +	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
> +	get_page(alloc_frag->page);
> +	len = MERGE_BUFFER_LEN;
> +	alloc_frag->offset += len;
> +	hole = alloc_frag->size - alloc_frag->offset;
> +	if (hole < MERGE_BUFFER_LEN) {
> +		len += hole;
> +		alloc_frag->offset += hole;
> +	}
>  
> -	sg_init_one(rq->sg, buf, MERGE_BUFFER_LEN);
> +	sg_init_one(rq->sg, buf, len);
>  	err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
>  	if (err < 0)
>  		put_page(virt_to_head_page(buf));
> @@ -1377,6 +1376,16 @@ static void free_receive_bufs(struct virtnet_info *vi)
>  	}
>  }
>  
> +static void free_receive_page_frags(struct virtnet_info *vi)
> +{
> +	int i;
> +	for (i = 0; i < vi->max_queue_pairs; i++)
> +		if (vi->rq[i].atomic_frag.page)
> +			put_page(vi->rq[i].atomic_frag.page);
> +	if (vi->sleep_frag.page)
> +		put_page(vi->sleep_frag.page);
> +}
> +
>  static void free_unused_bufs(struct virtnet_info *vi)
>  {
>  	void *buf;
> @@ -1706,8 +1715,7 @@ free_recv_bufs:
>  free_vqs:
>  	cancel_delayed_work_sync(&vi->refill);
>  	virtnet_del_vqs(vi);
> -	if (vi->alloc_frag.page)
> -		put_page(vi->alloc_frag.page);
> +	free_receive_page_frags(vi);
>  free_stats:
>  	free_percpu(vi->stats);
>  free:
> @@ -1741,8 +1749,7 @@ static void virtnet_remove(struct virtio_device *vdev)
>  	unregister_netdev(vi->dev);
>  
>  	remove_vq_common(vi);
> -	if (vi->alloc_frag.page)
> -		put_page(vi->alloc_frag.page);
> +	free_receive_page_frags(vi);
>  
>  	flush_work(&vi->config_work);
>  
> -- 
> 1.8.5.1

  parent reply	other threads:[~2013-12-23 13:31 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-12-17  0:16 [PATCH net-next 1/3] net: allow > 0 order atomic page alloc in skb_page_frag_refill Michael Dalton
2013-12-17  0:16 ` [PATCH net-next 2/3] virtio-net: use per-receive queue page frag alloc for mergeable bufs Michael Dalton
2013-12-23  8:12   ` Jason Wang
2013-12-23 17:27     ` Eric Dumazet
2013-12-23 19:37       ` Michael S. Tsirkin
2013-12-26 21:28         ` Michael Dalton
2013-12-26 21:37           ` Michael S. Tsirkin
2013-12-26 22:00             ` Eric Dumazet
2014-01-08 17:21               ` Michael S. Tsirkin
2014-01-08 18:09                 ` Eric Dumazet
2014-01-08 18:57                   ` Michael S. Tsirkin
2014-01-08 19:54                   ` David Miller
2014-01-08 21:16                   ` Rick Jones
2013-12-26 22:00             ` Eric Dumazet
2013-12-26 21:56           ` Eric Dumazet
2013-12-27  4:55             ` Jason Wang
2013-12-27  5:46               ` Eric Dumazet
2013-12-27  6:12                 ` Jason Wang
2013-12-26 21:56           ` Eric Dumazet
2013-12-23 13:31   ` Michael S. Tsirkin [this message]
2013-12-17  0:16 ` [PATCH net-next 3/3] net: auto-tune mergeable rx buffer size for improved performance Michael Dalton
2013-12-17  0:16 ` Michael Dalton
2013-12-23 12:51   ` Michael S. Tsirkin
2013-12-23 13:33   ` Michael S. Tsirkin
2013-12-30 10:14     ` Amos Kong
2014-01-08 17:41       ` Michael S. Tsirkin
2013-12-26  7:33   ` Jason Wang
2013-12-26 20:06     ` Michael Dalton
2013-12-26 20:24       ` Michael S. Tsirkin
2013-12-27  3:04       ` Jason Wang
2013-12-27 21:41         ` Michael Dalton
2013-12-30  4:50           ` Jason Wang
2013-12-30  5:38           ` Jason Wang
2014-01-08 17:37           ` Michael S. Tsirkin
2013-12-19 19:58 ` [PATCH net-next 1/3] net: allow > 0 order atomic page alloc in skb_page_frag_refill David Miller
2013-12-23 13:35   ` Michael S. Tsirkin
2013-12-23  7:52 ` Jason Wang
2013-12-23 17:24   ` Eric Dumazet
2013-12-23 17:24   ` Eric Dumazet
2013-12-23 12:53 ` Michael S. Tsirkin
2013-12-23 17:30   ` Eric Dumazet
2013-12-23 19:19     ` Michael S. Tsirkin
2013-12-24 22:46 ` David Miller
2014-01-03  0:42   ` Debabrata Banerjee
2014-01-03  0:56     ` Eric Dumazet
2014-01-03  1:26       ` Eric Dumazet
2014-01-03  1:59         ` Debabrata Banerjee
2014-01-03 22:47           ` Debabrata Banerjee
2014-01-03 22:54             ` Eric Dumazet
2014-01-03 23:27               ` Debabrata Banerjee
2014-01-03 23:27               ` Debabrata Banerjee
2014-01-03 22:47           ` Debabrata Banerjee
2014-01-03  1:59         ` Debabrata Banerjee
2014-01-03  0:42   ` Debabrata Banerjee
2013-12-24 22:46 ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20131223133106.GC18168@redhat.com \
    --to=mst@redhat.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=mwdalton@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.