netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Shirley Ma <mashirle@us.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>,
	Avi Kivity <avi@redhat.com>,
	netdev@vger.kernel.org, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Anthony Liguori <anthony@codemonkey.ws>
Subject: Re: PATCH v2 3/4] Defer skb allocation -- new recvbuf alloc & receive calls
Date: Sun, 13 Dec 2009 13:43:20 +0200	[thread overview]
Message-ID: <20091213114320.GC7074@redhat.com> (raw)
In-Reply-To: <1260535613.30371.24.camel@localhost.localdomain>

On Fri, Dec 11, 2009 at 04:46:53AM -0800, Shirley Ma wrote:
> Signed-off-by: Shirley Ma <xma@us.ibm.com>
> -------------
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 100b4b9..dde8060 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -203,6 +203,73 @@ static struct sk_buff *skb_goodcopy(struct virtnet_info *vi, struct page **page,
>  	return skb;
>  }
>  
> +static struct sk_buff *receive_big(struct virtnet_info *vi, struct page *page,
> +				   unsigned int len)
> +{
> +	struct sk_buff *skb;
> +
> +	skb = skb_goodcopy(vi, &page, &len);
> +	if (unlikely(!skb))
> +		return NULL;
> +
> +	while (len > 0) {
> +		len = skb_set_frag(skb, page, 0, len);
> +		page = (struct page *)page->private;

Interesting. I think skb_goodcopy will sometimes
set *page to NULL. Will the above crash then?

> +	}
> +
> +	if (page)
> +		give_pages(vi, page);
> +
> +	return skb;
> +}
> +
> +static struct sk_buff *receive_mergeable(struct virtnet_info *vi,
> +					 struct page *page, unsigned int len)
> +{
> +	struct sk_buff *skb;
> +	struct skb_vnet_hdr *hdr;
> +	int num_buf, i;
> +
> +	if (len > PAGE_SIZE)
> +		len = PAGE_SIZE;
> +
> +	skb = skb_goodcopy(vi, &page, &len);
> +

don't put empty line here. if below is part of same logical block as
skb_goodcopy.

> +	if (unlikely(!skb))
> +		return NULL;

don't we care that *page might not be NULL? why not?

> +
> +	hdr = skb_vnet_hdr(skb);
> +	num_buf = hdr->mhdr.num_buffers;
> +	while (--num_buf) {
> +		struct page *page;

Local variable shadows a parameter.
It seems gcc will let you get away with a warning,
but this is not legal C.

> +
> +		i = skb_shinfo(skb)->nr_frags;
> +		if (i >= MAX_SKB_FRAGS) {
> +			pr_debug("%s: packet too long %d\n", skb->dev->name,
> +				 len);


If this happens, we have corrupted memory already.
We do need this check, but please put is before you increment
nr_frags.

> +			skb->dev->stats.rx_length_errors++;
> +			return skb;

This will propagate the error up the stack and corrupt
more memory.

> +		}
> +
> +		page = vi->rvq->vq_ops->get_buf(vi->rvq, &len);
> +		if (!page) {
> +			pr_debug("%s: rx error: %d buffers missing\n",
> +				 skb->dev->name, hdr->mhdr.num_buffers);
> +			skb->dev->stats.rx_length_errors++;
> +			return skb;

Here, skb is some random part of packet, don't propagate
it up the stack.

> +		}
> +
> +		if (len > PAGE_SIZE)
> +			len = PAGE_SIZE;
> +
> +		skb_set_frag(skb, page, 0, len);
> +
> +		vi->num--;
> +	}
> +
> +	return skb;
> +}
> +
>  static void receive_skb(struct net_device *dev, struct sk_buff *skb,
>  			unsigned len)
>  {
> @@ -356,6 +423,103 @@ drop:
>  	dev_kfree_skb(skb);
>  }
>  
> +static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp, bool *oom)
> +{
> +	struct sk_buff *skb;
> +	struct skb_vnet_hdr *hdr;
> +	struct scatterlist sg[2];
> +	int err = 0;
> +
> +	skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN + NET_IP_ALIGN);
> +	if (unlikely(!skb)) {
> +		*oom = true;
> +		return err;
> +	}
> +
> +	skb_reserve(skb, NET_IP_ALIGN);
> +	skb_put(skb, MAX_PACKET_LEN);
> +
> +	hdr = skb_vnet_hdr(skb);
> +	sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr));

sizeof hdr->hdr

> +
> +	skb_to_sgvec(skb, sg+1, 0, skb->len);

space around +

> +
> +	err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 2, skb);
> +	if (err < 0)
> +		kfree_skb(skb);
> +	else
> +		skb_queue_head(&vi->recv, skb);

So why are we queueing this still?

> +
> +	return err;
> +}
> +
> +static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp, bool *oom)
> +{
> +	struct scatterlist sg[2 + MAX_SKB_FRAGS];

MAX_SKB_FRAGS + 2 will be more readable.
Also, create a macro for this constant and document
why does +2 make sense?

> +	int total = MAX_SKB_FRAGS + 2;
> +	char *p;
> +	int err = 0;
> +	int i, offset;
> +	struct page *first = NULL;
> +	struct page *page;
> +	/* share one page between virtio_net header and data */
> +	struct padded_vnet_hdr {
> +		struct virtio_net_hdr hdr;
> +		/* This padding makes our data 16 byte aligned */
> +		char padding[6];

Again, pls explain *why* do we want 16 byte alignment.
Also this code seems duplicated?
Please put structs at top of file where they
can be found.

> +	};
> +
> +	offset = sizeof(struct padded_vnet_hdr);
> +
> +	for (i = total - 1; i > 0; i--) {

I prefer --i.
Also, total is just a constant.
So simply MAX_SKB_FRAGS + 1 will be clearer.
Why do we scan last to first?
If there's reason, please add a comment.

> +		page = get_a_page(vi, gfp);
> +		if (!page) {
> +			if (first)
> +				give_pages(vi, first);
> +			*oom = true;
> +			break;
> +		}
> +
> +		p = page_address(page);
> +		page->private = (unsigned long)first;
> +		first = page;
> +
> +		/* allocate MAX_SKB_FRAGS + 1 pages for big packets */
> +		if (i == 1) {
> +			sg_set_buf(&sg[i-1], p, sizeof(struct virtio_net_hdr));

space around - .
All the if (i == 1) handling on exit is really hard to grok.
How about moving common code out of this loop
into a function, and then you can
	for (i = total - 1; i > 1; i--) {
		handle(i);
	}
	handle(1);
	handle(0);
	add_buf
	



> +			sg_set_buf(&sg[i], p + offset, PAGE_SIZE - offset);
> +			err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, total,
> +						       first);
> +			if (err < 0)
> +				give_pages(vi, first);
> +		} else
> +			sg_set_buf(&sg[i], p, PAGE_SIZE);
> +	}
> +
> +	return err;
> +}
> +
> +static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp, bool *oom)


do we really need *oom here and below?
We can just set err to ENOMEM, no?

> +{
> +	struct page *page;
> +	struct scatterlist sg;
> +	int err = 0;
> +
> +	page = get_a_page(vi, gfp);
> +	if (!page) {
> +		*oom = true;
> +		return err;

Please do not return 0 on failure.

> +	}
> +
> +	sg_init_one(&sg, page_address(page), PAGE_SIZE);
> +
> +	err = vi->rvq->vq_ops->add_buf(vi->rvq, &sg, 0, 1, page);
> +	if (err < 0)
> +		give_pages(vi, page);
> +
> +	return err;
> +}
> +
>  static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp)
>  {
>  	struct sk_buff *skb;
> 

  reply	other threads:[~2009-12-13 11:43 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-20  6:09 [PATCH 0/1] Defer skb allocation for both mergeable buffers and big packets in virtio_net Shirley Ma
2009-11-23  0:53 ` Rusty Russell
2009-11-23  8:51   ` Mark McLoughlin
2009-12-08 12:21   ` Michael S. Tsirkin
2009-12-11 12:28     ` [PATCH v2 0/4] " Shirley Ma
2009-12-11 12:33       ` [PATCH v2 1/4] Defer skb allocation -- add destroy buffers function for virtio Shirley Ma
2009-12-13 10:26         ` Michael S. Tsirkin
2009-12-14 20:08           ` Shirley Ma
2009-12-14 20:22             ` Michael S. Tsirkin
2009-12-14 23:22               ` Shirley Ma
2009-12-15 10:57                 ` Michael S. Tsirkin
2009-12-15 22:36               ` Rusty Russell
2009-12-15 22:40                 ` Michael S. Tsirkin
2009-12-16  5:04                   ` Rusty Russell
2009-12-14  3:25         ` Rusty Russell
2009-12-14 22:09           ` Shirley Ma
2009-12-11 12:43       ` [PATCH v2 2/4] Defer skb allocation -- new skb_set calls & chain pages in virtio_net Shirley Ma
2009-12-13 11:24         ` Michael S. Tsirkin
2009-12-14 21:23           ` Shirley Ma
2009-12-15 11:21             ` Michael S. Tsirkin
2009-12-14  6:54         ` Rusty Russell
2009-12-14 22:10           ` Shirley Ma
2009-12-11 12:46       ` PATCH v2 3/4] Defer skb allocation -- new recvbuf alloc & receive calls Shirley Ma
2009-12-13 11:43         ` Michael S. Tsirkin [this message]
2009-12-14 22:08           ` Shirley Ma
2009-12-15  0:37             ` Shirley Ma
2009-12-15 11:33             ` Michael S. Tsirkin
2009-12-15 16:25               ` Shirley Ma
2009-12-15 16:39                 ` Michael S. Tsirkin
2009-12-15 18:42                   ` [RFC PATCH] Subject: virtio: Add unused buffers detach from vring Shirley Ma
2009-12-15 18:47                     ` Michael S. Tsirkin
2009-12-15 19:08                       ` Shirley Ma
2009-12-15 19:14                       ` Shirley Ma
2009-12-15 21:14                         ` Michael S. Tsirkin
2009-12-11 12:49       ` [PATCH v2 4/4] Defer skb allocation -- change allocation & receiving in recv path Shirley Ma
2009-12-13 11:08         ` Michael S. Tsirkin
2009-12-15  8:43           ` Shirley Ma
2009-12-13 10:19       ` [PATCH v2 0/4] Defer skb allocation for both mergeable buffers and big packets in virtio_net Michael S. Tsirkin
2009-12-14 19:59         ` Shirley Ma

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091213114320.GC7074@redhat.com \
    --to=mst@redhat.com \
    --cc=anthony@codemonkey.ws \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mashirle@us.ibm.com \
    --cc=netdev@vger.kernel.org \
    --cc=rusty@rustcorp.com.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).