[RFC] avoid unnecessary alignement overhead in skb->data allocation.

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [RFC] avoid unnecessary alignement overhead in skb->data allocation.
@ 2006-08-07  6:01 Evgeniy Polyakov
  2006-08-07  6:23 ` David Miller
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Evgeniy Polyakov @ 2006-08-07  6:01 UTC (permalink / raw)
  To: netdev; +Cc: davem

Hello.

Attached patch allows to avoid unnecessary alignment overhead
in skb->data allocation.
Main idea is to allocate struct skb_shared_info from cache when
addition of sizeof(struct skb_shared_info) ens up in different order
allocation than initial size order.
This allows to solve problem with 4k allocations for 1500 MTU and 32k
allocations for 9k jumbo frames for some chips.
Patch was not tested, so if idea worth it I will complete it.

Signed-off-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 19c96d4..7474682 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -282,7 +282,8 @@ struct sk_buff {
 				nfctinfo:3;
 	__u8			pkt_type:3,
 				fclone:2,
-				ipvs_property:1;
+				ipvs_property:1,
+				shinfo_cache:1;
 	__be16			protocol;
 
 	void			(*destructor)(struct sk_buff *skb);
@@ -403,7 +404,9 @@ extern unsigned int   skb_find_text(stru
 				    struct ts_state *state);
 
 /* Internal */
-#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
+#define skb_shinfo(SKB)		((SKB)->shinfo_cache?\
+		(struct skb_shared_info *)(*((SKB)->end)):\
+		((struct skb_shared_info *)((SKB)->end)))
 
 /**
  *	skb_queue_empty - check if a queue is empty
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 022d889..7287814 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -69,6 +69,7 @@ #include <asm/system.h>
 
 static kmem_cache_t *skbuff_head_cache __read_mostly;
 static kmem_cache_t *skbuff_fclone_cache __read_mostly;
+static kmem_cache_t *skbuff_shared_info_cache __read_mostly;
 
 /*
  *	Keep out-of-line to prevent kernel bloat.
@@ -146,6 +147,8 @@ struct sk_buff *__alloc_skb(unsigned int
 	struct skb_shared_info *shinfo;
 	struct sk_buff *skb;
 	u8 *data;
+	int order = get_order(size + sizeof(void *));
+	struct skb_shared_info *sh;
 
 	cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
 
@@ -156,11 +159,28 @@ struct sk_buff *__alloc_skb(unsigned int
 
 	/* Get the DATA. Size must match skb_add_mtu(). */
 	size = SKB_DATA_ALIGN(size);
-	data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
-	if (!data)
-		goto nodata;
+	if ((1UL << order) > size + sizeof(void *) + sizeof(struct skb_shared_info)) {
+		data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+		if (!data)
+			goto nodata;
+		memset(skb, 0, offsetof(struct sk_buff, truesize));
+	} else {
+		unsigned long *ptr;
+
+		data = ____kmalloc(size, gfp_mask);
+		if (!data)
+			goto nodata;
+		sh = kmem_cache_alloc(skbuff_shared_info_cache, gfp_mask);
+		if (!sh) {
+			kfree(data);
+			goto nodata;
+		}
+		memset(skb, 0, offsetof(struct sk_buff, truesize));
+		skb->shinfo_cache = 1;
+		ptr = data;
+		ptr[size] = sh;
+	}
 
-	memset(skb, 0, offsetof(struct sk_buff, truesize));
 	skb->truesize = size + sizeof(struct sk_buff);
 	atomic_set(&skb->users, 1);
 	skb->head = data;
@@ -314,6 +334,8 @@ static void skb_release_data(struct sk_b
 			skb_drop_fraglist(skb);
 
 		kfree(skb->head);
+		if (skb->shinfo_cache)
+			kmem_cache_free(skbuff_shared_info_cache, *(skb->end));
 	}
 }
 
@@ -500,6 +522,7 @@ #endif
 	C(data);
 	C(tail);
 	C(end);
+	C(shinfo_cache);
 
 	atomic_inc(&(skb_shinfo(skb)->dataref));
 	skb->cloned = 1;
@@ -2057,6 +2080,14 @@ void __init skb_init(void)
 						NULL, NULL);
 	if (!skbuff_fclone_cache)
 		panic("cannot create skbuff cache");
+	
+	skbuff_shared_info_cache = kmem_cache_create("skbuff_shared_info_cache",
+						sizeof(struct sbk_shared_info),
+						0,
+						SLAB_HWCACHE_ALIGN,
+						NULL, NULL);
+	if (!skbuff_shared_info_cache)
+		panic("cannot create skbuff shared info cache");
 }
 
 EXPORT_SYMBOL(___pskb_trim);


-- 
	Evgeniy Polyakov

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-07  6:01 [RFC] avoid unnecessary alignement overhead in skb->data allocation Evgeniy Polyakov
@ 2006-08-07  6:23 ` David Miller
  2006-08-07  6:30   ` Evgeniy Polyakov
  2006-08-07  7:17   ` Herbert Xu
  2006-08-07  6:29 ` Herbert Xu
  2006-08-07  8:05 ` Eric Dumazet
  2 siblings, 2 replies; 18+ messages in thread
From: David Miller @ 2006-08-07  6:23 UTC (permalink / raw)
  To: johnpol; +Cc: netdev

From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Date: Mon, 7 Aug 2006 10:01:56 +0400

> +	int order = get_order(size + sizeof(void *));
> +	struct skb_shared_info *sh;
>  
>  	cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
>  
> @@ -156,11 +159,28 @@ struct sk_buff *__alloc_skb(unsigned int
>  
>  	/* Get the DATA. Size must match skb_add_mtu(). */
>  	size = SKB_DATA_ALIGN(size);
> -	data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
> -	if (!data)
> -		goto nodata;
> +	if ((1UL << order) > size + sizeof(void *) + sizeof(struct skb_shared_info)) {

get_order() returns a PAGE_SIZE order not a byte one.  So this test
here at the end is incorrect.  It should probably be something
like "if ((PAGE_SIZE << order) > ..."

I don't know if I want to eat an entire extra allocation for every SKB
just to handle broken e1000 cards that can't be bothered to support
non-power-of-2 receive buffer sizes and a proper MTU setting.

I guess we might have to, but this is extremely unfortunate. :-/


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-07  6:23 ` David Miller
@ 2006-08-07  6:30   ` Evgeniy Polyakov
  2006-08-07  7:17   ` Herbert Xu
  1 sibling, 0 replies; 18+ messages in thread
From: Evgeniy Polyakov @ 2006-08-07  6:30 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Sun, Aug 06, 2006 at 11:23:39PM -0700, David Miller (davem@davemloft.net) wrote:
> > +	if ((1UL << order) > size + sizeof(void *) + sizeof(struct skb_shared_info)) {
> 
> get_order() returns a PAGE_SIZE order not a byte one.  So this test
> here at the end is incorrect.  It should probably be something
> like "if ((PAGE_SIZE << order) > ..."
> 
> I don't know if I want to eat an entire extra allocation for every SKB
> just to handle broken e1000 cards that can't be bothered to support
> non-power-of-2 receive buffer sizes and a proper MTU setting.
> 
> I guess we might have to, but this is extremely unfortunate. :-/

I have even better idea - create alloc_skb_aligned() for those who
knows in advance, that it's size is always aligned to power of 2, so
additional skb_shared_info will 100% require higher order allocation.
Then e1000 can use that instead of usual alloc_skb().

-- 
	Evgeniy Polyakov

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-07  6:23 ` David Miller
  2006-08-07  6:30   ` Evgeniy Polyakov
@ 2006-08-07  7:17   ` Herbert Xu
  2006-08-07  7:24     ` Evgeniy Polyakov
  1 sibling, 1 reply; 18+ messages in thread
From: Herbert Xu @ 2006-08-07  7:17 UTC (permalink / raw)
  To: David Miller; +Cc: johnpol, netdev

David Miller <davem@davemloft.net> wrote:
> 
> I don't know if I want to eat an entire extra allocation for every SKB
> just to handle broken e1000 cards that can't be bothered to support
> non-power-of-2 receive buffer sizes and a proper MTU setting.
> 
> I guess we might have to, but this is extremely unfortunate. :-/

I'd hope not.  Apparently they are capable of putting data into
individual pages and chaining them together.  The only problem
is that half a page is wasted for 1500-byte packets.

However, allocating 16KB packets would waste even more memory
if only 1500 bytes end up getting used.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-07  7:17   ` Herbert Xu
@ 2006-08-07  7:24     ` Evgeniy Polyakov
  2006-08-07  7:28       ` Herbert Xu
  0 siblings, 1 reply; 18+ messages in thread
From: Evgeniy Polyakov @ 2006-08-07  7:24 UTC (permalink / raw)
  To: Herbert Xu; +Cc: David Miller, netdev

On Mon, Aug 07, 2006 at 05:17:13PM +1000, Herbert Xu (herbert@gondor.apana.org.au) wrote:
> David Miller <davem@davemloft.net> wrote:
> > 
> > I don't know if I want to eat an entire extra allocation for every SKB
> > just to handle broken e1000 cards that can't be bothered to support
> > non-power-of-2 receive buffer sizes and a proper MTU setting.
> > 
> > I guess we might have to, but this is extremely unfortunate. :-/
> 
> I'd hope not.  Apparently they are capable of putting data into
> individual pages and chaining them together.  The only problem

Unfortunately not all chips are capable to do this.

-- 
	Evgeniy Polyakov

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-07  7:24     ` Evgeniy Polyakov
@ 2006-08-07  7:28       ` Herbert Xu
  2006-08-07  7:31         ` Evgeniy Polyakov
  0 siblings, 1 reply; 18+ messages in thread
From: Herbert Xu @ 2006-08-07  7:28 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: David Miller, netdev, Jesse Brandeburg

On Mon, Aug 07, 2006 at 11:24:23AM +0400, Evgeniy Polyakov wrote:
>
> > I'd hope not.  Apparently they are capable of putting data into
> > individual pages and chaining them together.  The only problem
> 
> Unfortunately not all chips are capable to do this.

No not all chips are capable of header-splitting.  However, from what
Jesse was saying it sounded as if all (or most?) chips are capable of
storing data cross pages.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-07  7:28       ` Herbert Xu
@ 2006-08-07  7:31         ` Evgeniy Polyakov
  2006-08-07  7:39           ` Herbert Xu
  0 siblings, 1 reply; 18+ messages in thread
From: Evgeniy Polyakov @ 2006-08-07  7:31 UTC (permalink / raw)
  To: Herbert Xu; +Cc: David Miller, netdev, Jesse Brandeburg

On Mon, Aug 07, 2006 at 05:28:16PM +1000, Herbert Xu (herbert@gondor.apana.org.au) wrote:
> On Mon, Aug 07, 2006 at 11:24:23AM +0400, Evgeniy Polyakov wrote:
> >
> > > I'd hope not.  Apparently they are capable of putting data into
> > > individual pages and chaining them together.  The only problem
> > 
> > Unfortunately not all chips are capable to do this.
> 
> No not all chips are capable of header-splitting.  However, from what
> Jesse was saying it sounded as if all (or most?) chips are capable of
> storing data cross pages.

Only if they form contiguous region?
Jesse, is it possible for every e1000 chip to split frame into several
page-sized chunks i.e. create some kind of receiving scatter-gather?

> Cheers,
> -- 
> Visit Openswan at http://www.openswan.org/
> Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

-- 
	Evgeniy Polyakov

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-07  7:31         ` Evgeniy Polyakov
@ 2006-08-07  7:39           ` Herbert Xu
  2006-08-08  0:09             ` Jesse Brandeburg
  0 siblings, 1 reply; 18+ messages in thread
From: Herbert Xu @ 2006-08-07  7:39 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: David Miller, netdev, Jesse Brandeburg, chris.leech

On Mon, Aug 07, 2006 at 11:31:03AM +0400, Evgeniy Polyakov wrote:
> 
> Only if they form contiguous region?
> Jesse, is it possible for every e1000 chip to split frame into several
> page-sized chunks i.e. create some kind of receiving scatter-gather?

Actually, it was Chris Leech who raised this possibility:

: Yes, e1000 devices will spill over and use multiple buffers for a
: single frame.  We've been trying to find a good way to use multiple
: buffers to take care of these allocation problems.  The structure of
: the sk_buff does not make it easy.  Or should I say that it's the
: limitation that drivers are not allowed to chain together multiple
: sk_buffs to represent a single frame that does not make it easy.

Perhaps he can enlighten us.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-07  7:39           ` Herbert Xu
@ 2006-08-08  0:09             ` Jesse Brandeburg
  2006-08-08  0:41               ` David Miller
  2006-08-08  5:24               ` Evgeniy Polyakov
  0 siblings, 2 replies; 18+ messages in thread
From: Jesse Brandeburg @ 2006-08-08  0:09 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Evgeniy Polyakov, David Miller, netdev, chris.leech,
	Brandeburg, Jesse

On 8/7/06, Herbert Xu <herbert@gondor.apana.org.au> wrote:
> On Mon, Aug 07, 2006 at 11:31:03AM +0400, Evgeniy Polyakov wrote:
> >
> > Only if they form contiguous region?
> > Jesse, is it possible for every e1000 chip to split frame into several
> > page-sized chunks i.e. create some kind of receiving scatter-gather?

now you get to the meat of the problem.  Yes, all versions of e1000
can receive packets longer than the receive data area in the
descriptor.  If the data area is shorter than the packet, then the
data over flows into the next descriptor.

>
> Actually, it was Chris Leech who raised this possibility:
>
> : Yes, e1000 devices will spill over and use multiple buffers for a
> : single frame.  We've been trying to find a good way to use multiple
> : buffers to take care of these allocation problems.  The structure of
> : the sk_buff does not make it easy.  Or should I say that it's the
> : limitation that drivers are not allowed to chain together multiple
> : sk_buffs to represent a single frame that does not make it easy.
>
> Perhaps he can enlighten us.

Or since i'm here... in any case we had driver code (see driver
6.2.15) that did this at one point, but we removed it because it was
using frag_list

So here is our problem with the network driver API.
the only way to indicate multiple buffer (descriptor) receives is to
use nr_frags.  Our non split-header hardware needs power of 2
allocations *except* in the 1500 byte MTU case where we can optimize
by having the hardware drop all frames > 1522 bytes

we would like to have a method to use alloc_skb to get packets from
slab to receive into and then chain them together.  Right now that is
not possible because you can't map alloc_skb'd data areas directly to
pages to put into nr_frags.

much of this comes from the requirement that the stack free the skb we
allocated.  if we had an async callback for the driver to take care of
freeing the skb then we could
a) recycle
b) handle pages in some efficient manner.

also, eth_type_trans wants skb->data to point to header, which would
require us to memcpy data from a page back to skb->data.

We could use help to get this done and mutiple drivers would benefit.
I can't get it done by myself, as much as I would like to.

As for Evgeniy's suggestion of using the end of the e1000 receive
buffer to store something I think it is a bad idea. Our hardware deals
with powers of 2.  From the e1000 manual:
 =====
LPE controls whether long packet reception is permitted. Hardware
discards long packets if LPE is 0. A long packet is one longer than
1522 bytes. If LPE is 1, the maximum packet size that the device can
receive is 16384 bytes.
 =====

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-08  0:09             ` Jesse Brandeburg
@ 2006-08-08  0:41               ` David Miller
  2006-08-08  5:24               ` Evgeniy Polyakov
  1 sibling, 0 replies; 18+ messages in thread
From: David Miller @ 2006-08-08  0:41 UTC (permalink / raw)
  To: jesse.brandeburg; +Cc: herbert, johnpol, netdev, chris.leech, jesse.brandeburg

From: "Jesse Brandeburg" <jesse.brandeburg@gmail.com>
Date: Mon, 7 Aug 2006 17:09:39 -0700

> also, eth_type_trans wants skb->data to point to header, which would
> require us to memcpy data from a page back to skb->data.

You merely would need to call pskb_may_pull() right before invoking
eth_type_trans.  Meeting that functions expectations with a non-linear
SKB is not brain surgery. :-)


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-08  0:09             ` Jesse Brandeburg
  2006-08-08  0:41               ` David Miller
@ 2006-08-08  5:24               ` Evgeniy Polyakov
  2006-08-08  5:41                 ` Herbert Xu
  1 sibling, 1 reply; 18+ messages in thread
From: Evgeniy Polyakov @ 2006-08-08  5:24 UTC (permalink / raw)
  To: Jesse Brandeburg
  Cc: Herbert Xu, David Miller, netdev, chris.leech, Brandeburg, Jesse

On Mon, Aug 07, 2006 at 05:09:39PM -0700, Jesse Brandeburg (jesse.brandeburg@gmail.com) wrote:
> LPE controls whether long packet reception is permitted. Hardware
> discards long packets if LPE is 0. A long packet is one longer than
> 1522 bytes. If LPE is 1, the maximum packet size that the device can
> receive is 16384 bytes.

So there is no place at the end of skb for additional pointer.
And new question arises - until what Jesse suggested is implemented in
some way, do we need to store a pointer to shared info inside skb and
allocate it from cache in case it does no fit into aligned buffer (in
case of e1000 it happens all the time exept 1500 MTU)?
David, Herbert?

-- 
	Evgeniy Polyakov

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-08  5:24               ` Evgeniy Polyakov
@ 2006-08-08  5:41                 ` Herbert Xu
  2006-08-08  5:55                   ` Evgeniy Polyakov
  0 siblings, 1 reply; 18+ messages in thread
From: Herbert Xu @ 2006-08-08  5:41 UTC (permalink / raw)
  To: Evgeniy Polyakov
  Cc: Jesse Brandeburg, David Miller, netdev, chris.leech,
	Brandeburg, Jesse

On Tue, Aug 08, 2006 at 09:24:08AM +0400, Evgeniy Polyakov wrote:
> 
> So there is no place at the end of skb for additional pointer.
> And new question arises - until what Jesse suggested is implemented in
> some way, do we need to store a pointer to shared info inside skb and
> allocate it from cache in case it does no fit into aligned buffer (in
> case of e1000 it happens all the time exept 1500 MTU)?
> David, Herbert?

I'm not sure whether this is really worth it.  After all, the only
order of allocation that's really likely to succeed is 0.  So going
from order 3 to order 2 probably doesn't make that big a difference.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-08  5:41                 ` Herbert Xu
@ 2006-08-08  5:55                   ` Evgeniy Polyakov
  0 siblings, 0 replies; 18+ messages in thread
From: Evgeniy Polyakov @ 2006-08-08  5:55 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Jesse Brandeburg, David Miller, netdev, chris.leech,
	Brandeburg, Jesse

On Tue, Aug 08, 2006 at 03:41:15PM +1000, Herbert Xu (herbert@gondor.apana.org.au) wrote:
> On Tue, Aug 08, 2006 at 09:24:08AM +0400, Evgeniy Polyakov wrote:
> > 
> > So there is no place at the end of skb for additional pointer.
> > And new question arises - until what Jesse suggested is implemented in
> > some way, do we need to store a pointer to shared info inside skb and
> > allocate it from cache in case it does no fit into aligned buffer (in
> > case of e1000 it happens all the time exept 1500 MTU)?
> > David, Herbert?
> 
> I'm not sure whether this is really worth it.  After all, the only
> order of allocation that's really likely to succeed is 0.  So going
> from order 3 to order 2 probably doesn't make that big a difference.

16k is quite big overhead and it is much more possible to succeed than
32k, but in general, yes, only order 0 can succeed.
So right now we can mark e1000 and other cards, which do not support
frag_list generation as not supporting jumbo frames?

> Cheers,
> -- 
> Visit Openswan at http://www.openswan.org/
> Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

-- 
	Evgeniy Polyakov

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-07  6:01 [RFC] avoid unnecessary alignement overhead in skb->data allocation Evgeniy Polyakov
  2006-08-07  6:23 ` David Miller
@ 2006-08-07  6:29 ` Herbert Xu
  2006-08-07  6:36   ` Evgeniy Polyakov
  2006-08-07  8:05 ` Eric Dumazet
  2 siblings, 1 reply; 18+ messages in thread
From: Herbert Xu @ 2006-08-07  6:29 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: netdev, davem

Evgeniy Polyakov <johnpol@2ka.mipt.ru> wrote:
> 
> Attached patch allows to avoid unnecessary alignment overhead
> in skb->data allocation.
> Main idea is to allocate struct skb_shared_info from cache when
> addition of sizeof(struct skb_shared_info) ens up in different order
> allocation than initial size order.
> This allows to solve problem with 4k allocations for 1500 MTU and 32k
> allocations for 9k jumbo frames for some chips.
> Patch was not tested, so if idea worth it I will complete it.

I thought the Intel guys were saying that their NIC could write the
full 16KB which means it it's unsafe to use the last four bytes, no?

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-07  6:29 ` Herbert Xu
@ 2006-08-07  6:36   ` Evgeniy Polyakov
  2006-08-07  6:42     ` Herbert Xu
  0 siblings, 1 reply; 18+ messages in thread
From: Evgeniy Polyakov @ 2006-08-07  6:36 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, davem

On Mon, Aug 07, 2006 at 04:29:09PM +1000, Herbert Xu (herbert@gondor.apana.org.au) wrote:
> Evgeniy Polyakov <johnpol@2ka.mipt.ru> wrote:
> > 
> > Attached patch allows to avoid unnecessary alignment overhead
> > in skb->data allocation.
> > Main idea is to allocate struct skb_shared_info from cache when
> > addition of sizeof(struct skb_shared_info) ens up in different order
> > allocation than initial size order.
> > This allows to solve problem with 4k allocations for 1500 MTU and 32k
> > allocations for 9k jumbo frames for some chips.
> > Patch was not tested, so if idea worth it I will complete it.
> 
> I thought the Intel guys were saying that their NIC could write the
> full 16KB which means it it's unsafe to use the last four bytes, no?

Well, theirs comments in code say, that maximum allowed frame size is
0x3f00, so there is a little place at the end to put there a pointer,
so I allocate size + sizeof(void *).
If they actually can eat all 16k, then we need a pointer somewhere in
the skb for shared_info, since 16k + sizeof(void *) will be aligned to
32k.

> Cheers,
 -- 
> Visit Openswan at http://www.openswan.org/
> Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

-- 
	Evgeniy Polyakov

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-07  6:36   ` Evgeniy Polyakov
@ 2006-08-07  6:42     ` Herbert Xu
  0 siblings, 0 replies; 18+ messages in thread
From: Herbert Xu @ 2006-08-07  6:42 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: netdev, davem

On Mon, Aug 07, 2006 at 10:36:36AM +0400, Evgeniy Polyakov wrote:
> 
> Well, theirs comments in code say, that maximum allowed frame size is
> 0x3f00, so there is a little place at the end to put there a pointer,
> so I allocate size + sizeof(void *).
> If they actually can eat all 16k, then we need a pointer somewhere in
> the skb for shared_info, since 16k + sizeof(void *) will be aligned to
> 32k.

It would be good to get a definitive statement from them before we go
down this track.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-07  6:01 [RFC] avoid unnecessary alignement overhead in skb->data allocation Evgeniy Polyakov
  2006-08-07  6:23 ` David Miller
  2006-08-07  6:29 ` Herbert Xu
@ 2006-08-07  8:05 ` Eric Dumazet
  2006-08-07  8:14   ` Evgeniy Polyakov
  2 siblings, 1 reply; 18+ messages in thread
From: Eric Dumazet @ 2006-08-07  8:05 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: netdev, davem

On Monday 07 August 2006 08:01, Evgeniy Polyakov wrote:
> Hello.
>
> Attached patch allows to avoid unnecessary alignment overhead
> in skb->data allocation.
> Main idea is to allocate struct skb_shared_info from cache when
> addition of sizeof(struct skb_shared_info) ens up in different order
> allocation than initial size order.
> This allows to solve problem with 4k allocations for 1500 MTU and 32k
> allocations for 9k jumbo frames for some chips.
> Patch was not tested, so if idea worth it I will complete it.
>
> Signed-off-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
>

> +	if ((1UL << order) > size + sizeof(void *) + sizeof(struct
> skb_shared_info)) { +		data = ____kmalloc(size + sizeof(struct
> skb_shared_info), gfp_mask); +		if (!data)
> +			goto nodata;
> +		memset(skb, 0, offsetof(struct sk_buff, truesize));
> +	} else {
> +		unsigned long *ptr;
> +
> +		data = ____kmalloc(size, gfp_mask);

You certainly want to kmalloc(size + sizeof(void *)) here, dont you ?

> +		if (!data)
> +			goto nodata;
> +		sh = kmem_cache_alloc(skbuff_shared_info_cache, gfp_mask);
> +		if (!sh) {
> +			kfree(data);
> +			goto nodata;
> +		}
> +		memset(skb, 0, offsetof(struct sk_buff, truesize));
> +		skb->shinfo_cache = 1;
> +		ptr = data;
> +		ptr[size] = sh;

Eric

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC] avoid unnecessary alignement overhead in skb->data allocation.
  2006-08-07  8:05 ` Eric Dumazet
@ 2006-08-07  8:14   ` Evgeniy Polyakov
  0 siblings, 0 replies; 18+ messages in thread
From: Evgeniy Polyakov @ 2006-08-07  8:14 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, davem

On Mon, Aug 07, 2006 at 10:05:57AM +0200, Eric Dumazet (dada1@cosmosbay.com) wrote:
> > +	if ((1UL << order) > size + sizeof(void *) + sizeof(struct
> > skb_shared_info)) { +		data = ____kmalloc(size + sizeof(struct
> > skb_shared_info), gfp_mask); +		if (!data)
> > +			goto nodata;
> > +		memset(skb, 0, offsetof(struct sk_buff, truesize));
> > +	} else {
> > +		unsigned long *ptr;
> > +
> > +		data = ____kmalloc(size, gfp_mask);
> 
> You certainly want to kmalloc(size + sizeof(void *)) here, dont you ?

Yep.
I think in next iteration of this patch I will add additional argument
which will present order of aligned size (to eliminate get_order() loop
for those who know it in advance like e1000). In case there are no
place even for sizeof(void *) (what happens with e1000) and allocation 
order is quite high (more than half of the page), then additional field 
in skb can be used (or we can reuse it unconditionally to store pointer
to shared info if skb is being allocated through alloc_skb_aligned()
function).

> Eric

-- 
	Evgeniy Polyakov

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2006-08-08  5:55 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-08-07  6:01 [RFC] avoid unnecessary alignement overhead in skb->data allocation Evgeniy Polyakov
2006-08-07  6:23 ` David Miller
2006-08-07  6:30   ` Evgeniy Polyakov
2006-08-07  7:17   ` Herbert Xu
2006-08-07  7:24     ` Evgeniy Polyakov
2006-08-07  7:28       ` Herbert Xu
2006-08-07  7:31         ` Evgeniy Polyakov
2006-08-07  7:39           ` Herbert Xu
2006-08-08  0:09             ` Jesse Brandeburg
2006-08-08  0:41               ` David Miller
2006-08-08  5:24               ` Evgeniy Polyakov
2006-08-08  5:41                 ` Herbert Xu
2006-08-08  5:55                   ` Evgeniy Polyakov
2006-08-07  6:29 ` Herbert Xu
2006-08-07  6:36   ` Evgeniy Polyakov
2006-08-07  6:42     ` Herbert Xu
2006-08-07  8:05 ` Eric Dumazet
2006-08-07  8:14   ` Evgeniy Polyakov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).