[PATCH net-next] net: more accurate skb truesize

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH net-next] net: more accurate skb truesize
@ 2011-10-13 15:26 Eric Dumazet
  2011-10-13 16:05 ` Ben Hutchings
  2011-10-13 20:33 ` Andi Kleen
  0 siblings, 2 replies; 10+ messages in thread
From: Eric Dumazet @ 2011-10-13 15:26 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Andi Kleen

skb truesize currently accounts for sk_buff struct and part of skb head.

Considering that skb_shared_info is larger than sk_buff, its time to
take it into account for better memory accounting.

This patch introduces SKB_TRUESIZE(X) macro to centralize various
assumptions into a single place.

At skb alloc phase, we put skb_shared_info struct at the exact end of
skb head, to allow a better use of memory (lowering number of
reallocations), since kmalloc() gives us power-of-two memory blocks.

Note: This patch might trigger performance regressions because of
misconfigured protocol stacks, hitting per socket or global memory
limits that were previously not reached. But its a necessary step for a
more accurate memory accounting.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Andi Kleen <ak@linux.intel.com>
---
 include/linux/skbuff.h |    5 +++++
 net/core/skbuff.c      |   13 +++++++++----
 net/core/sock.c        |    2 +-
 net/ipv4/icmp.c        |    5 ++---
 net/ipv4/tcp_input.c   |   14 +++++++-------
 net/ipv6/icmp.c        |    3 +--
 net/iucv/af_iucv.c     |    2 +-
 net/sctp/protocol.c    |    2 +-
 8 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ac6b05a..64f8695 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -46,6 +46,11 @@
 #define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X), 0))
 #define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0, 2))
 
+/* return minimum truesize of one skb containing X bytes of data */
+#define SKB_TRUESIZE(X) ((X) +						\
+			 SKB_DATA_ALIGN(sizeof(struct sk_buff)) +	\
+			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
 /* A. Checksumming of received packets by device.
  *
  *	NONE: device failed to checksum this packet.
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5b2c5f1..be66154 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -184,11 +184,15 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 		goto out;
 	prefetchw(skb);
 
-	size = SKB_DATA_ALIGN(size);
-	data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
-			gfp_mask, node);
+	size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	data = kmalloc_node_track_caller(size, gfp_mask, node);
 	if (!data)
 		goto nodata;
+	/* kmalloc(size) might give us more room than asked.
+	 * Put skb_shared_info exactly at the end of allocated zone,
+	 * to allow max possible filling before reallocation.
+	 */
+	size = SKB_WITH_OVERHEAD(ksize(data));
 	prefetchw(data + size);
 
 	/*
@@ -197,7 +201,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	 * the tail pointer in struct sk_buff!
 	 */
 	memset(skb, 0, offsetof(struct sk_buff, tail));
-	skb->truesize = size + sizeof(struct sk_buff);
+	/* Account for allocated memory : skb + skb->head */
+	skb->truesize = SKB_TRUESIZE(size);
 	atomic_set(&skb->users, 1);
 	skb->head = data;
 	skb->data = data;
diff --git a/net/core/sock.c b/net/core/sock.c
index 83c462d..5a08762 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -207,7 +207,7 @@ static struct lock_class_key af_callback_keys[AF_MAX];
  * not depend upon such differences.
  */
 #define _SK_MEM_PACKETS		256
-#define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
+#define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 23ef31b..ab188ae 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1152,10 +1152,9 @@ static int __net_init icmp_sk_init(struct net *net)
 		net->ipv4.icmp_sk[i] = sk;
 
 		/* Enough space for 2 64K ICMP packets, including
-		 * sk_buff struct overhead.
+		 * sk_buff/skb_shared_info struct overhead.
 		 */
-		sk->sk_sndbuf =
-			(2 * ((64 * 1024) + sizeof(struct sk_buff)));
+		sk->sk_sndbuf =	2 * SKB_TRUESIZE(64 * 1024);
 
 		/*
 		 * Speedup sock_wfree()
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 81cae64..c1653fe 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -265,8 +265,7 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
 
 static void tcp_fixup_sndbuf(struct sock *sk)
 {
-	int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
-		     sizeof(struct sk_buff);
+	int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
 
 	if (sk->sk_sndbuf < 3 * sndmem) {
 		sk->sk_sndbuf = 3 * sndmem;
@@ -349,7 +348,7 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
 static void tcp_fixup_rcvbuf(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
+	int rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
 
 	/* Try to select rcvbuf so that 4 mss-sized segments
 	 * will fit to window and corresponding skbs will fit to our rcvbuf.
@@ -540,8 +539,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
 			space /= tp->advmss;
 			if (!space)
 				space = 1;
-			rcvmem = (tp->advmss + MAX_TCP_HEADER +
-				  16 + sizeof(struct sk_buff));
+			rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
 			while (tcp_win_from_space(rcvmem) < tp->advmss)
 				rcvmem += 128;
 			space *= rcvmem;
@@ -4950,8 +4948,10 @@ static void tcp_new_space(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	if (tcp_should_expand_sndbuf(sk)) {
-		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
-			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
+		int sndmem = SKB_TRUESIZE(max_t(u32,
+						tp->rx_opt.mss_clamp,
+						tp->mss_cache) +
+					  MAX_TCP_HEADER);
 		int demanded = max_t(unsigned int, tp->snd_cwnd,
 				     tp->reordering + 1);
 		sndmem *= 2 * demanded;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 2b59154..90868fb 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -835,8 +835,7 @@ static int __net_init icmpv6_sk_init(struct net *net)
 		/* Enough space for 2 64K ICMP packets, including
 		 * sk_buff struct overhead.
 		 */
-		sk->sk_sndbuf =
-			(2 * ((64 * 1024) + sizeof(struct sk_buff)));
+		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
 	}
 	return 0;
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c39f3a4..274d150 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1819,7 +1819,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 		goto save_message;
 
 	len = atomic_read(&sk->sk_rmem_alloc);
-	len += iucv_msg_length(msg) + sizeof(struct sk_buff);
+	len += SKB_TRUESIZE(iucv_msg_length(msg));
 	if (len > sk->sk_rcvbuf)
 		goto save_message;
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 91784f4..61b9fca 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1299,7 +1299,7 @@ SCTP_STATIC __init int sctp_init(void)
 	max_share = min(4UL*1024*1024, limit);
 
 	sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */
-	sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1));
+	sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1);
 	sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
 
 	sysctl_sctp_wmem[0] = SK_MEM_QUANTUM;

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] net: more accurate skb truesize
  2011-10-13 15:26 [PATCH net-next] net: more accurate skb truesize Eric Dumazet
@ 2011-10-13 16:05 ` Ben Hutchings
  2011-10-13 16:24   ` Eric Dumazet
  2011-10-13 20:33 ` Andi Kleen
  1 sibling, 1 reply; 10+ messages in thread
From: Ben Hutchings @ 2011-10-13 16:05 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, Andi Kleen

On Thu, 2011-10-13 at 17:26 +0200, Eric Dumazet wrote:
> skb truesize currently accounts for sk_buff struct and part of skb head.
> 
> Considering that skb_shared_info is larger than sk_buff, its time to
> take it into account for better memory accounting.
> 
> This patch introduces SKB_TRUESIZE(X) macro to centralize various
> assumptions into a single place.
> 
> At skb alloc phase, we put skb_shared_info struct at the exact end of
> skb head, to allow a better use of memory (lowering number of
> reallocations), since kmalloc() gives us power-of-two memory blocks.
[...]
> index 5b2c5f1..be66154 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -184,11 +184,15 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
>  		goto out;
>  	prefetchw(skb);
>  
> -	size = SKB_DATA_ALIGN(size);
> -	data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
> -			gfp_mask, node);
> +	size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
[...]

If we want to put the data and skb_shared_info on separate cache-lines
then we should use:
	size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
(which is effectively what we're doing now).

If that's not important, and we just want to be sure that the allocation
occupies at least a whole cache line, then it should be:
	size = SKB_DATA_ALIGN(size + sizeof(struct skb_shared_info));

But I don't think it makes sense to use SKB_DATA_ALIGN(sizeof(struct
skb_shared_info)).

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] net: more accurate skb truesize
  2011-10-13 16:05 ` Ben Hutchings
@ 2011-10-13 16:24   ` Eric Dumazet
  2011-10-13 16:32     ` Ben Hutchings
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2011-10-13 16:24 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: David Miller, netdev, Andi Kleen

Le jeudi 13 octobre 2011 à 17:05 +0100, Ben Hutchings a écrit :
> On Thu, 2011-10-13 at 17:26 +0200, Eric Dumazet wrote:
> > skb truesize currently accounts for sk_buff struct and part of skb head.
> > 
> > Considering that skb_shared_info is larger than sk_buff, its time to
> > take it into account for better memory accounting.
> > 
> > This patch introduces SKB_TRUESIZE(X) macro to centralize various
> > assumptions into a single place.
> > 
> > At skb alloc phase, we put skb_shared_info struct at the exact end of
> > skb head, to allow a better use of memory (lowering number of
> > reallocations), since kmalloc() gives us power-of-two memory blocks.
> [...]
> > index 5b2c5f1..be66154 100644
> > --- a/net/core/skbuff.c
> > +++ b/net/core/skbuff.c
> > @@ -184,11 +184,15 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
> >  		goto out;
> >  	prefetchw(skb);
> >  
> > -	size = SKB_DATA_ALIGN(size);
> > -	data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
> > -			gfp_mask, node);
> > +	size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> [...]
> 
> If we want to put the data and skb_shared_info on separate cache-lines
> then we should use:
> 	size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
> (which is effectively what we're doing now).
> 

Same behavior after my patch : skb_shared_info starts at a cache-line
boundary, like before, unless kmalloc() gives us unaligned memory (it
can in certain debugging situations)

So previous part (before skb_shared_info) will also be a multiple of
SMP_CACHE_BYTES because of kmalloc() behavior.

> If that's not important, and we just want to be sure that the allocation
> occupies at least a whole cache line, then it should be:
> 	size = SKB_DATA_ALIGN(size + sizeof(struct skb_shared_info));
> 
> But I don't think it makes sense to use SKB_DATA_ALIGN(sizeof(struct
> skb_shared_info)).

If you take a closer look, you'll see that my patch addresses your
concerns, but at minimal cpu cost.

kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

will give same result than :

kmalloc(SKB_DATA_ALIGN(size) + SKB_DATA_ALIGN(sizeof(struct
skb_shared_info)))

But my version is a bit faster (a single add of a compiler known
constant)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] net: more accurate skb truesize
  2011-10-13 16:24   ` Eric Dumazet
@ 2011-10-13 16:32     ` Ben Hutchings
  2011-10-13 17:11       ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Ben Hutchings @ 2011-10-13 16:32 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, Andi Kleen

On Thu, 2011-10-13 at 18:24 +0200, Eric Dumazet wrote:
> Le jeudi 13 octobre 2011 à 17:05 +0100, Ben Hutchings a écrit :
[...]
> > If that's not important, and we just want to be sure that the allocation
> > occupies at least a whole cache line, then it should be:
> > 	size = SKB_DATA_ALIGN(size + sizeof(struct skb_shared_info));
> > 
> > But I don't think it makes sense to use SKB_DATA_ALIGN(sizeof(struct
> > skb_shared_info)).
> 
> If you take a closer look, you'll see that my patch addresses your
> concerns, but at minimal cpu cost.
> 
> kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
> 
> will give same result than :
> 
> kmalloc(SKB_DATA_ALIGN(size) + SKB_DATA_ALIGN(sizeof(struct
> skb_shared_info)))
> 
> But my version is a bit faster (a single add of a compiler known
> constant)

Fair enough, but please add a comment explaining this.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] net: more accurate skb truesize
  2011-10-13 16:32     ` Ben Hutchings
@ 2011-10-13 17:11       ` Eric Dumazet
  2011-10-13 17:13         ` Ben Hutchings
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2011-10-13 17:11 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: David Miller, netdev, Andi Kleen

Le jeudi 13 octobre 2011 à 17:32 +0100, Ben Hutchings a écrit :

> Fair enough, but please add a comment explaining this.
> 


What about :

	/* We do our best to align skb_shared_info on a separate cache
	 * line. It usually works because kmalloc(X > CACHE_BYTES) gives
	 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
	 */
	size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] net: more accurate skb truesize
  2011-10-13 17:11       ` Eric Dumazet
@ 2011-10-13 17:13         ` Ben Hutchings
  2011-10-13 17:28           ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Ben Hutchings @ 2011-10-13 17:13 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, Andi Kleen

On Thu, 2011-10-13 at 19:11 +0200, Eric Dumazet wrote:
> Le jeudi 13 octobre 2011 à 17:32 +0100, Ben Hutchings a écrit :
> 
> > Fair enough, but please add a comment explaining this.
> > 
> 
> 
> What about :
> 
> 	/* We do our best to align skb_shared_info on a separate cache
> 	 * line. It usually works because kmalloc(X > CACHE_BYTES) gives
> 	 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
> 	 */
> 	size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));

Yes, that seems like a good explanation.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] net: more accurate skb truesize
  2011-10-13 17:13         ` Ben Hutchings
@ 2011-10-13 17:28           ` Eric Dumazet
  2011-10-13 20:05             ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2011-10-13 17:28 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: David Miller, netdev, Andi Kleen

Le jeudi 13 octobre 2011 à 18:13 +0100, Ben Hutchings a écrit :
> On Thu, 2011-10-13 at 19:11 +0200, Eric Dumazet wrote:
> > Le jeudi 13 octobre 2011 à 17:32 +0100, Ben Hutchings a écrit :
> > 
> > > Fair enough, but please add a comment explaining this.
> > > 
> > 
> > 
> > What about :
> > 
> > 	/* We do our best to align skb_shared_info on a separate cache
> > 	 * line. It usually works because kmalloc(X > CACHE_BYTES) gives
> > 	 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
> > 	 */
> > 	size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> 
> Yes, that seems like a good explanation.

Thanks !

Here is the second version of the patch, including this comment.

[PATCH V2 net-next] net: more accurate skb truesize

skb truesize currently accounts for sk_buff struct and part of skb head.
kmalloc() roundings are also ignored.

Considering that skb_shared_info is larger than sk_buff, its time to
take it into account for better memory accounting.

This patch introduces SKB_TRUESIZE(X) macro to centralize various
assumptions into a single place.

At skb alloc phase, we put skb_shared_info struct at the exact end of
skb head, to allow a better use of memory (lowering number of
reallocations), since kmalloc() gives us power-of-two memory blocks.

Unless SLUB/SLUB debug is active, both skb->head and skb_shared_info are
aligned to cache lines, as before.

Note: This patch might trigger performance regressions because of
misconfigured protocol stacks, hitting per socket or global memory
limits that were previously not reached. But its a necessary step for a
more accurate memory accounting.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Andi Kleen <ak@linux.intel.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
---
 include/linux/skbuff.h |    5 +++++
 net/core/skbuff.c      |   18 ++++++++++++++----
 net/core/sock.c        |    2 +-
 net/ipv4/icmp.c        |    5 ++---
 net/ipv4/tcp_input.c   |   14 +++++++-------
 net/ipv6/icmp.c        |    3 +--
 net/iucv/af_iucv.c     |    2 +-
 net/sctp/protocol.c    |    2 +-
 8 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ac6b05a..64f8695 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -46,6 +46,11 @@
 #define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X), 0))
 #define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0, 2))
 
+/* return minimum truesize of one skb containing X bytes of data */
+#define SKB_TRUESIZE(X) ((X) +						\
+			 SKB_DATA_ALIGN(sizeof(struct sk_buff)) +	\
+			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
 /* A. Checksumming of received packets by device.
  *
  *	NONE: device failed to checksum this packet.
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5b2c5f1..a7f855d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -184,11 +184,20 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 		goto out;
 	prefetchw(skb);
 
-	size = SKB_DATA_ALIGN(size);
-	data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
-			gfp_mask, node);
+	/* We do our best to align skb_shared_info on a separate cache
+	 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
+	 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
+	 * Both skb->head and skb_shared_info are cache line aligned.
+	 */
+	size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	data = kmalloc_node_track_caller(size, gfp_mask, node);
 	if (!data)
 		goto nodata;
+	/* kmalloc(size) might give us more room than requested.
+	 * Put skb_shared_info exactly at the end of allocated zone,
+	 * to allow max possible filling before reallocation.
+	 */
+	size = SKB_WITH_OVERHEAD(ksize(data));
 	prefetchw(data + size);
 
 	/*
@@ -197,7 +206,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	 * the tail pointer in struct sk_buff!
 	 */
 	memset(skb, 0, offsetof(struct sk_buff, tail));
-	skb->truesize = size + sizeof(struct sk_buff);
+	/* Account for allocated memory : skb + skb->head */
+	skb->truesize = SKB_TRUESIZE(size);
 	atomic_set(&skb->users, 1);
 	skb->head = data;
 	skb->data = data;
diff --git a/net/core/sock.c b/net/core/sock.c
index 83c462d..5a08762 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -207,7 +207,7 @@ static struct lock_class_key af_callback_keys[AF_MAX];
  * not depend upon such differences.
  */
 #define _SK_MEM_PACKETS		256
-#define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
+#define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 23ef31b..ab188ae 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1152,10 +1152,9 @@ static int __net_init icmp_sk_init(struct net *net)
 		net->ipv4.icmp_sk[i] = sk;
 
 		/* Enough space for 2 64K ICMP packets, including
-		 * sk_buff struct overhead.
+		 * sk_buff/skb_shared_info struct overhead.
 		 */
-		sk->sk_sndbuf =
-			(2 * ((64 * 1024) + sizeof(struct sk_buff)));
+		sk->sk_sndbuf =	2 * SKB_TRUESIZE(64 * 1024);
 
 		/*
 		 * Speedup sock_wfree()
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 81cae64..c1653fe 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -265,8 +265,7 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
 
 static void tcp_fixup_sndbuf(struct sock *sk)
 {
-	int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
-		     sizeof(struct sk_buff);
+	int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
 
 	if (sk->sk_sndbuf < 3 * sndmem) {
 		sk->sk_sndbuf = 3 * sndmem;
@@ -349,7 +348,7 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
 static void tcp_fixup_rcvbuf(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
+	int rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
 
 	/* Try to select rcvbuf so that 4 mss-sized segments
 	 * will fit to window and corresponding skbs will fit to our rcvbuf.
@@ -540,8 +539,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
 			space /= tp->advmss;
 			if (!space)
 				space = 1;
-			rcvmem = (tp->advmss + MAX_TCP_HEADER +
-				  16 + sizeof(struct sk_buff));
+			rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
 			while (tcp_win_from_space(rcvmem) < tp->advmss)
 				rcvmem += 128;
 			space *= rcvmem;
@@ -4950,8 +4948,10 @@ static void tcp_new_space(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	if (tcp_should_expand_sndbuf(sk)) {
-		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
-			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
+		int sndmem = SKB_TRUESIZE(max_t(u32,
+						tp->rx_opt.mss_clamp,
+						tp->mss_cache) +
+					  MAX_TCP_HEADER);
 		int demanded = max_t(unsigned int, tp->snd_cwnd,
 				     tp->reordering + 1);
 		sndmem *= 2 * demanded;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 2b59154..90868fb 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -835,8 +835,7 @@ static int __net_init icmpv6_sk_init(struct net *net)
 		/* Enough space for 2 64K ICMP packets, including
 		 * sk_buff struct overhead.
 		 */
-		sk->sk_sndbuf =
-			(2 * ((64 * 1024) + sizeof(struct sk_buff)));
+		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
 	}
 	return 0;
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c39f3a4..274d150 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1819,7 +1819,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 		goto save_message;
 
 	len = atomic_read(&sk->sk_rmem_alloc);
-	len += iucv_msg_length(msg) + sizeof(struct sk_buff);
+	len += SKB_TRUESIZE(iucv_msg_length(msg));
 	if (len > sk->sk_rcvbuf)
 		goto save_message;
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 91784f4..61b9fca 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1299,7 +1299,7 @@ SCTP_STATIC __init int sctp_init(void)
 	max_share = min(4UL*1024*1024, limit);
 
 	sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */
-	sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1));
+	sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1);
 	sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
 
 	sysctl_sctp_wmem[0] = SK_MEM_QUANTUM;

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] net: more accurate skb truesize
  2011-10-13 17:28           ` Eric Dumazet
@ 2011-10-13 20:05             ` David Miller
  0 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2011-10-13 20:05 UTC (permalink / raw)
  To: eric.dumazet; +Cc: bhutchings, netdev, ak

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 13 Oct 2011 19:28:54 +0200

> [PATCH V2 net-next] net: more accurate skb truesize
> 
> skb truesize currently accounts for sk_buff struct and part of skb head.
> kmalloc() roundings are also ignored.
> 
> Considering that skb_shared_info is larger than sk_buff, its time to
> take it into account for better memory accounting.
> 
> This patch introduces SKB_TRUESIZE(X) macro to centralize various
> assumptions into a single place.
> 
> At skb alloc phase, we put skb_shared_info struct at the exact end of
> skb head, to allow a better use of memory (lowering number of
> reallocations), since kmalloc() gives us power-of-two memory blocks.
> 
> Unless SLUB/SLUB debug is active, both skb->head and skb_shared_info are
> aligned to cache lines, as before.
> 
> Note: This patch might trigger performance regressions because of
> misconfigured protocol stacks, hitting per socket or global memory
> limits that were previously not reached. But its a necessary step for a
> more accurate memory accounting.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] net: more accurate skb truesize
  2011-10-13 15:26 [PATCH net-next] net: more accurate skb truesize Eric Dumazet
  2011-10-13 16:05 ` Ben Hutchings
@ 2011-10-13 20:33 ` Andi Kleen
  2011-10-13 20:51   ` Eric Dumazet
  1 sibling, 1 reply; 10+ messages in thread
From: Andi Kleen @ 2011-10-13 20:33 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev

On Thu, Oct 13, 2011 at 05:26:21PM +0200, Eric Dumazet wrote:
> skb truesize currently accounts for sk_buff struct and part of skb head.
> 
> Considering that skb_shared_info is larger than sk_buff, its time to
> take it into account for better memory accounting.
> 
> This patch introduces SKB_TRUESIZE(X) macro to centralize various
> assumptions into a single place.

It's still quite inaccurate, especially for the kmalloced data area if it's not
paged. It would be better to ask slab how much memory was really 
allocated. But at least this could be done more easily now with the new 
macro, so it's definitely a step in the right direction.

-Andi

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next] net: more accurate skb truesize
  2011-10-13 20:33 ` Andi Kleen
@ 2011-10-13 20:51   ` Eric Dumazet
  0 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2011-10-13 20:51 UTC (permalink / raw)
  To: Andi Kleen; +Cc: David Miller, netdev

Le jeudi 13 octobre 2011 à 13:33 -0700, Andi Kleen a écrit :
> On Thu, Oct 13, 2011 at 05:26:21PM +0200, Eric Dumazet wrote:
> > skb truesize currently accounts for sk_buff struct and part of skb head.
> > 
> > Considering that skb_shared_info is larger than sk_buff, its time to
> > take it into account for better memory accounting.
> > 
> > This patch introduces SKB_TRUESIZE(X) macro to centralize various
> > assumptions into a single place.
> 
> It's still quite inaccurate, especially for the kmalloced data area if it's not
> paged. It would be better to ask slab how much memory was really 
> allocated. But at least this could be done more easily now with the new 
> macro, so it's definitely a step in the right direction.

Note : in skb_alloc() function, SKB_TRUESIZE(size) delivers the exact
value : I do the ksize(data) call to ask how many byte kmalloc()
provided me.

So skb->truesize is quite accurate (unless KMEMCHECK or other debug
stuff is used of course)

For the SKB_TRUESIZE() macro, we dont want to do a dummy call to
kmalloc()/kfree(), since its basically used to roughly set a queue
limit.

Thanks !

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-10-13 20:51 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-10-13 15:26 [PATCH net-next] net: more accurate skb truesize Eric Dumazet
2011-10-13 16:05 ` Ben Hutchings
2011-10-13 16:24   ` Eric Dumazet
2011-10-13 16:32     ` Ben Hutchings
2011-10-13 17:11       ` Eric Dumazet
2011-10-13 17:13         ` Ben Hutchings
2011-10-13 17:28           ` Eric Dumazet
2011-10-13 20:05             ` David Miller
2011-10-13 20:33 ` Andi Kleen
2011-10-13 20:51   ` Eric Dumazet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).