* [PATCH net-next] net: more accurate skb truesize @ 2011-10-13 15:26 Eric Dumazet 2011-10-13 16:05 ` Ben Hutchings 2011-10-13 20:33 ` Andi Kleen 0 siblings, 2 replies; 10+ messages in thread From: Eric Dumazet @ 2011-10-13 15:26 UTC (permalink / raw) To: David Miller; +Cc: netdev, Andi Kleen skb truesize currently accounts for sk_buff struct and part of skb head. Considering that skb_shared_info is larger than sk_buff, its time to take it into account for better memory accounting. This patch introduces SKB_TRUESIZE(X) macro to centralize various assumptions into a single place. At skb alloc phase, we put skb_shared_info struct at the exact end of skb head, to allow a better use of memory (lowering number of reallocations), since kmalloc() gives us power-of-two memory blocks. Note: This patch might trigger performance regressions because of misconfigured protocol stacks, hitting per socket or global memory limits that were previously not reached. But its a necessary step for a more accurate memory accounting. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Andi Kleen <ak@linux.intel.com> --- include/linux/skbuff.h | 5 +++++ net/core/skbuff.c | 13 +++++++++---- net/core/sock.c | 2 +- net/ipv4/icmp.c | 5 ++--- net/ipv4/tcp_input.c | 14 +++++++------- net/ipv6/icmp.c | 3 +-- net/iucv/af_iucv.c | 2 +- net/sctp/protocol.c | 2 +- 8 files changed, 27 insertions(+), 19 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ac6b05a..64f8695 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -46,6 +46,11 @@ #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) +/* return minimum truesize of one skb containing X bytes of data */ +#define SKB_TRUESIZE(X) ((X) + \ + SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + /* A. Checksumming of received packets by device. * * NONE: device failed to checksum this packet. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5b2c5f1..be66154 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -184,11 +184,15 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, goto out; prefetchw(skb); - size = SKB_DATA_ALIGN(size); - data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), - gfp_mask, node); + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + data = kmalloc_node_track_caller(size, gfp_mask, node); if (!data) goto nodata; + /* kmalloc(size) might give us more room than asked. + * Put skb_shared_info exactly at the end of allocated zone, + * to allow max possible filling before reallocation. + */ + size = SKB_WITH_OVERHEAD(ksize(data)); prefetchw(data + size); /* @@ -197,7 +201,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->truesize = size + sizeof(struct sk_buff); + /* Account for allocated memory : skb + skb->head */ + skb->truesize = SKB_TRUESIZE(size); atomic_set(&skb->users, 1); skb->head = data; skb->data = data; diff --git a/net/core/sock.c b/net/core/sock.c index 83c462d..5a08762 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -207,7 +207,7 @@ static struct lock_class_key af_callback_keys[AF_MAX]; * not depend upon such differences. */ #define _SK_MEM_PACKETS 256 -#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256) +#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 23ef31b..ab188ae 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1152,10 +1152,9 @@ static int __net_init icmp_sk_init(struct net *net) net->ipv4.icmp_sk[i] = sk; /* Enough space for 2 64K ICMP packets, including - * sk_buff struct overhead. + * sk_buff/skb_shared_info struct overhead. */ - sk->sk_sndbuf = - (2 * ((64 * 1024) + sizeof(struct sk_buff))); + sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); /* * Speedup sock_wfree() diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 81cae64..c1653fe 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -265,8 +265,7 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) static void tcp_fixup_sndbuf(struct sock *sk) { - int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + - sizeof(struct sk_buff); + int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); if (sk->sk_sndbuf < 3 * sndmem) { sk->sk_sndbuf = 3 * sndmem; @@ -349,7 +348,7 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) static void tcp_fixup_rcvbuf(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); + int rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); /* Try to select rcvbuf so that 4 mss-sized segments * will fit to window and corresponding skbs will fit to our rcvbuf. @@ -540,8 +539,7 @@ void tcp_rcv_space_adjust(struct sock *sk) space /= tp->advmss; if (!space) space = 1; - rcvmem = (tp->advmss + MAX_TCP_HEADER + - 16 + sizeof(struct sk_buff)); + rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); while (tcp_win_from_space(rcvmem) < tp->advmss) rcvmem += 128; space *= rcvmem; @@ -4950,8 +4948,10 @@ static void tcp_new_space(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tcp_should_expand_sndbuf(sk)) { - int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + - MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); + int sndmem = SKB_TRUESIZE(max_t(u32, + tp->rx_opt.mss_clamp, + tp->mss_cache) + + MAX_TCP_HEADER); int demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering + 1); sndmem *= 2 * demanded; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 2b59154..90868fb 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -835,8 +835,7 @@ static int __net_init icmpv6_sk_init(struct net *net) /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ - sk->sk_sndbuf = - (2 * ((64 * 1024) + sizeof(struct sk_buff))); + sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); } return 0; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c39f3a4..274d150 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1819,7 +1819,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) goto save_message; len = atomic_read(&sk->sk_rmem_alloc); - len += iucv_msg_length(msg) + sizeof(struct sk_buff); + len += SKB_TRUESIZE(iucv_msg_length(msg)); if (len > sk->sk_rcvbuf) goto save_message; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 91784f4..61b9fca 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1299,7 +1299,7 @@ SCTP_STATIC __init int sctp_init(void) max_share = min(4UL*1024*1024, limit); sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */ - sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); + sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1); sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); sysctl_sctp_wmem[0] = SK_MEM_QUANTUM; ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] net: more accurate skb truesize 2011-10-13 15:26 [PATCH net-next] net: more accurate skb truesize Eric Dumazet @ 2011-10-13 16:05 ` Ben Hutchings 2011-10-13 16:24 ` Eric Dumazet 2011-10-13 20:33 ` Andi Kleen 1 sibling, 1 reply; 10+ messages in thread From: Ben Hutchings @ 2011-10-13 16:05 UTC (permalink / raw) To: Eric Dumazet; +Cc: David Miller, netdev, Andi Kleen On Thu, 2011-10-13 at 17:26 +0200, Eric Dumazet wrote: > skb truesize currently accounts for sk_buff struct and part of skb head. > > Considering that skb_shared_info is larger than sk_buff, its time to > take it into account for better memory accounting. > > This patch introduces SKB_TRUESIZE(X) macro to centralize various > assumptions into a single place. > > At skb alloc phase, we put skb_shared_info struct at the exact end of > skb head, to allow a better use of memory (lowering number of > reallocations), since kmalloc() gives us power-of-two memory blocks. [...] > index 5b2c5f1..be66154 100644 > --- a/net/core/skbuff.c > +++ b/net/core/skbuff.c > @@ -184,11 +184,15 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, > goto out; > prefetchw(skb); > > - size = SKB_DATA_ALIGN(size); > - data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), > - gfp_mask, node); > + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); [...] If we want to put the data and skb_shared_info on separate cache-lines then we should use: size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info); (which is effectively what we're doing now). If that's not important, and we just want to be sure that the allocation occupies at least a whole cache line, then it should be: size = SKB_DATA_ALIGN(size + sizeof(struct skb_shared_info)); But I don't think it makes sense to use SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). Ben. -- Ben Hutchings, Staff Engineer, Solarflare Not speaking for my employer; that's the marketing department's job. They asked us to note that Solarflare product names are trademarked. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] net: more accurate skb truesize 2011-10-13 16:05 ` Ben Hutchings @ 2011-10-13 16:24 ` Eric Dumazet 2011-10-13 16:32 ` Ben Hutchings 0 siblings, 1 reply; 10+ messages in thread From: Eric Dumazet @ 2011-10-13 16:24 UTC (permalink / raw) To: Ben Hutchings; +Cc: David Miller, netdev, Andi Kleen Le jeudi 13 octobre 2011 à 17:05 +0100, Ben Hutchings a écrit : > On Thu, 2011-10-13 at 17:26 +0200, Eric Dumazet wrote: > > skb truesize currently accounts for sk_buff struct and part of skb head. > > > > Considering that skb_shared_info is larger than sk_buff, its time to > > take it into account for better memory accounting. > > > > This patch introduces SKB_TRUESIZE(X) macro to centralize various > > assumptions into a single place. > > > > At skb alloc phase, we put skb_shared_info struct at the exact end of > > skb head, to allow a better use of memory (lowering number of > > reallocations), since kmalloc() gives us power-of-two memory blocks. > [...] > > index 5b2c5f1..be66154 100644 > > --- a/net/core/skbuff.c > > +++ b/net/core/skbuff.c > > @@ -184,11 +184,15 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, > > goto out; > > prefetchw(skb); > > > > - size = SKB_DATA_ALIGN(size); > > - data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), > > - gfp_mask, node); > > + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); > [...] > > If we want to put the data and skb_shared_info on separate cache-lines > then we should use: > size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info); > (which is effectively what we're doing now). > Same behavior after my patch : skb_shared_info starts at a cache-line boundary, like before, unless kmalloc() gives us unaligned memory (it can in certain debugging situations) So previous part (before skb_shared_info) will also be a multiple of SMP_CACHE_BYTES because of kmalloc() behavior. > If that's not important, and we just want to be sure that the allocation > occupies at least a whole cache line, then it should be: > size = SKB_DATA_ALIGN(size + sizeof(struct skb_shared_info)); > > But I don't think it makes sense to use SKB_DATA_ALIGN(sizeof(struct > skb_shared_info)). If you take a closer look, you'll see that my patch addresses your concerns, but at minimal cpu cost. kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) will give same result than : kmalloc(SKB_DATA_ALIGN(size) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) But my version is a bit faster (a single add of a compiler known constant) ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] net: more accurate skb truesize 2011-10-13 16:24 ` Eric Dumazet @ 2011-10-13 16:32 ` Ben Hutchings 2011-10-13 17:11 ` Eric Dumazet 0 siblings, 1 reply; 10+ messages in thread From: Ben Hutchings @ 2011-10-13 16:32 UTC (permalink / raw) To: Eric Dumazet; +Cc: David Miller, netdev, Andi Kleen On Thu, 2011-10-13 at 18:24 +0200, Eric Dumazet wrote: > Le jeudi 13 octobre 2011 à 17:05 +0100, Ben Hutchings a écrit : [...] > > If that's not important, and we just want to be sure that the allocation > > occupies at least a whole cache line, then it should be: > > size = SKB_DATA_ALIGN(size + sizeof(struct skb_shared_info)); > > > > But I don't think it makes sense to use SKB_DATA_ALIGN(sizeof(struct > > skb_shared_info)). > > If you take a closer look, you'll see that my patch addresses your > concerns, but at minimal cpu cost. > > kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) > > will give same result than : > > kmalloc(SKB_DATA_ALIGN(size) + SKB_DATA_ALIGN(sizeof(struct > skb_shared_info))) > > But my version is a bit faster (a single add of a compiler known > constant) Fair enough, but please add a comment explaining this. Ben. -- Ben Hutchings, Staff Engineer, Solarflare Not speaking for my employer; that's the marketing department's job. They asked us to note that Solarflare product names are trademarked. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] net: more accurate skb truesize 2011-10-13 16:32 ` Ben Hutchings @ 2011-10-13 17:11 ` Eric Dumazet 2011-10-13 17:13 ` Ben Hutchings 0 siblings, 1 reply; 10+ messages in thread From: Eric Dumazet @ 2011-10-13 17:11 UTC (permalink / raw) To: Ben Hutchings; +Cc: David Miller, netdev, Andi Kleen Le jeudi 13 octobre 2011 à 17:32 +0100, Ben Hutchings a écrit : > Fair enough, but please add a comment explaining this. > What about : /* We do our best to align skb_shared_info on a separate cache * line. It usually works because kmalloc(X > CACHE_BYTES) gives * aligned memory blocks, unless SLUB/SLAB debug is enabled. */ size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] net: more accurate skb truesize 2011-10-13 17:11 ` Eric Dumazet @ 2011-10-13 17:13 ` Ben Hutchings 2011-10-13 17:28 ` Eric Dumazet 0 siblings, 1 reply; 10+ messages in thread From: Ben Hutchings @ 2011-10-13 17:13 UTC (permalink / raw) To: Eric Dumazet; +Cc: David Miller, netdev, Andi Kleen On Thu, 2011-10-13 at 19:11 +0200, Eric Dumazet wrote: > Le jeudi 13 octobre 2011 à 17:32 +0100, Ben Hutchings a écrit : > > > Fair enough, but please add a comment explaining this. > > > > > What about : > > /* We do our best to align skb_shared_info on a separate cache > * line. It usually works because kmalloc(X > CACHE_BYTES) gives > * aligned memory blocks, unless SLUB/SLAB debug is enabled. > */ > size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); Yes, that seems like a good explanation. Ben. -- Ben Hutchings, Staff Engineer, Solarflare Not speaking for my employer; that's the marketing department's job. They asked us to note that Solarflare product names are trademarked. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] net: more accurate skb truesize 2011-10-13 17:13 ` Ben Hutchings @ 2011-10-13 17:28 ` Eric Dumazet 2011-10-13 20:05 ` David Miller 0 siblings, 1 reply; 10+ messages in thread From: Eric Dumazet @ 2011-10-13 17:28 UTC (permalink / raw) To: Ben Hutchings; +Cc: David Miller, netdev, Andi Kleen Le jeudi 13 octobre 2011 à 18:13 +0100, Ben Hutchings a écrit : > On Thu, 2011-10-13 at 19:11 +0200, Eric Dumazet wrote: > > Le jeudi 13 octobre 2011 à 17:32 +0100, Ben Hutchings a écrit : > > > > > Fair enough, but please add a comment explaining this. > > > > > > > > > What about : > > > > /* We do our best to align skb_shared_info on a separate cache > > * line. It usually works because kmalloc(X > CACHE_BYTES) gives > > * aligned memory blocks, unless SLUB/SLAB debug is enabled. > > */ > > size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); > > Yes, that seems like a good explanation. Thanks ! Here is the second version of the patch, including this comment. [PATCH V2 net-next] net: more accurate skb truesize skb truesize currently accounts for sk_buff struct and part of skb head. kmalloc() roundings are also ignored. Considering that skb_shared_info is larger than sk_buff, its time to take it into account for better memory accounting. This patch introduces SKB_TRUESIZE(X) macro to centralize various assumptions into a single place. At skb alloc phase, we put skb_shared_info struct at the exact end of skb head, to allow a better use of memory (lowering number of reallocations), since kmalloc() gives us power-of-two memory blocks. Unless SLUB/SLUB debug is active, both skb->head and skb_shared_info are aligned to cache lines, as before. Note: This patch might trigger performance regressions because of misconfigured protocol stacks, hitting per socket or global memory limits that were previously not reached. But its a necessary step for a more accurate memory accounting. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Andi Kleen <ak@linux.intel.com> CC: Ben Hutchings <bhutchings@solarflare.com> --- include/linux/skbuff.h | 5 +++++ net/core/skbuff.c | 18 ++++++++++++++---- net/core/sock.c | 2 +- net/ipv4/icmp.c | 5 ++--- net/ipv4/tcp_input.c | 14 +++++++------- net/ipv6/icmp.c | 3 +-- net/iucv/af_iucv.c | 2 +- net/sctp/protocol.c | 2 +- 8 files changed, 32 insertions(+), 19 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ac6b05a..64f8695 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -46,6 +46,11 @@ #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) +/* return minimum truesize of one skb containing X bytes of data */ +#define SKB_TRUESIZE(X) ((X) + \ + SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + /* A. Checksumming of received packets by device. * * NONE: device failed to checksum this packet. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5b2c5f1..a7f855d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -184,11 +184,20 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, goto out; prefetchw(skb); - size = SKB_DATA_ALIGN(size); - data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), - gfp_mask, node); + /* We do our best to align skb_shared_info on a separate cache + * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives + * aligned memory blocks, unless SLUB/SLAB debug is enabled. + * Both skb->head and skb_shared_info are cache line aligned. + */ + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + data = kmalloc_node_track_caller(size, gfp_mask, node); if (!data) goto nodata; + /* kmalloc(size) might give us more room than requested. + * Put skb_shared_info exactly at the end of allocated zone, + * to allow max possible filling before reallocation. + */ + size = SKB_WITH_OVERHEAD(ksize(data)); prefetchw(data + size); /* @@ -197,7 +206,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->truesize = size + sizeof(struct sk_buff); + /* Account for allocated memory : skb + skb->head */ + skb->truesize = SKB_TRUESIZE(size); atomic_set(&skb->users, 1); skb->head = data; skb->data = data; diff --git a/net/core/sock.c b/net/core/sock.c index 83c462d..5a08762 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -207,7 +207,7 @@ static struct lock_class_key af_callback_keys[AF_MAX]; * not depend upon such differences. */ #define _SK_MEM_PACKETS 256 -#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256) +#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 23ef31b..ab188ae 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1152,10 +1152,9 @@ static int __net_init icmp_sk_init(struct net *net) net->ipv4.icmp_sk[i] = sk; /* Enough space for 2 64K ICMP packets, including - * sk_buff struct overhead. + * sk_buff/skb_shared_info struct overhead. */ - sk->sk_sndbuf = - (2 * ((64 * 1024) + sizeof(struct sk_buff))); + sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); /* * Speedup sock_wfree() diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 81cae64..c1653fe 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -265,8 +265,7 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) static void tcp_fixup_sndbuf(struct sock *sk) { - int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + - sizeof(struct sk_buff); + int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); if (sk->sk_sndbuf < 3 * sndmem) { sk->sk_sndbuf = 3 * sndmem; @@ -349,7 +348,7 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) static void tcp_fixup_rcvbuf(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); + int rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); /* Try to select rcvbuf so that 4 mss-sized segments * will fit to window and corresponding skbs will fit to our rcvbuf. @@ -540,8 +539,7 @@ void tcp_rcv_space_adjust(struct sock *sk) space /= tp->advmss; if (!space) space = 1; - rcvmem = (tp->advmss + MAX_TCP_HEADER + - 16 + sizeof(struct sk_buff)); + rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); while (tcp_win_from_space(rcvmem) < tp->advmss) rcvmem += 128; space *= rcvmem; @@ -4950,8 +4948,10 @@ static void tcp_new_space(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tcp_should_expand_sndbuf(sk)) { - int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + - MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); + int sndmem = SKB_TRUESIZE(max_t(u32, + tp->rx_opt.mss_clamp, + tp->mss_cache) + + MAX_TCP_HEADER); int demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering + 1); sndmem *= 2 * demanded; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 2b59154..90868fb 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -835,8 +835,7 @@ static int __net_init icmpv6_sk_init(struct net *net) /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ - sk->sk_sndbuf = - (2 * ((64 * 1024) + sizeof(struct sk_buff))); + sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); } return 0; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c39f3a4..274d150 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1819,7 +1819,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) goto save_message; len = atomic_read(&sk->sk_rmem_alloc); - len += iucv_msg_length(msg) + sizeof(struct sk_buff); + len += SKB_TRUESIZE(iucv_msg_length(msg)); if (len > sk->sk_rcvbuf) goto save_message; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 91784f4..61b9fca 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1299,7 +1299,7 @@ SCTP_STATIC __init int sctp_init(void) max_share = min(4UL*1024*1024, limit); sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */ - sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); + sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1); sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); sysctl_sctp_wmem[0] = SK_MEM_QUANTUM; ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] net: more accurate skb truesize 2011-10-13 17:28 ` Eric Dumazet @ 2011-10-13 20:05 ` David Miller 0 siblings, 0 replies; 10+ messages in thread From: David Miller @ 2011-10-13 20:05 UTC (permalink / raw) To: eric.dumazet; +Cc: bhutchings, netdev, ak From: Eric Dumazet <eric.dumazet@gmail.com> Date: Thu, 13 Oct 2011 19:28:54 +0200 > [PATCH V2 net-next] net: more accurate skb truesize > > skb truesize currently accounts for sk_buff struct and part of skb head. > kmalloc() roundings are also ignored. > > Considering that skb_shared_info is larger than sk_buff, its time to > take it into account for better memory accounting. > > This patch introduces SKB_TRUESIZE(X) macro to centralize various > assumptions into a single place. > > At skb alloc phase, we put skb_shared_info struct at the exact end of > skb head, to allow a better use of memory (lowering number of > reallocations), since kmalloc() gives us power-of-two memory blocks. > > Unless SLUB/SLUB debug is active, both skb->head and skb_shared_info are > aligned to cache lines, as before. > > Note: This patch might trigger performance regressions because of > misconfigured protocol stacks, hitting per socket or global memory > limits that were previously not reached. But its a necessary step for a > more accurate memory accounting. > > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Applied. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] net: more accurate skb truesize 2011-10-13 15:26 [PATCH net-next] net: more accurate skb truesize Eric Dumazet 2011-10-13 16:05 ` Ben Hutchings @ 2011-10-13 20:33 ` Andi Kleen 2011-10-13 20:51 ` Eric Dumazet 1 sibling, 1 reply; 10+ messages in thread From: Andi Kleen @ 2011-10-13 20:33 UTC (permalink / raw) To: Eric Dumazet; +Cc: David Miller, netdev On Thu, Oct 13, 2011 at 05:26:21PM +0200, Eric Dumazet wrote: > skb truesize currently accounts for sk_buff struct and part of skb head. > > Considering that skb_shared_info is larger than sk_buff, its time to > take it into account for better memory accounting. > > This patch introduces SKB_TRUESIZE(X) macro to centralize various > assumptions into a single place. It's still quite inaccurate, especially for the kmalloced data area if it's not paged. It would be better to ask slab how much memory was really allocated. But at least this could be done more easily now with the new macro, so it's definitely a step in the right direction. -Andi ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] net: more accurate skb truesize 2011-10-13 20:33 ` Andi Kleen @ 2011-10-13 20:51 ` Eric Dumazet 0 siblings, 0 replies; 10+ messages in thread From: Eric Dumazet @ 2011-10-13 20:51 UTC (permalink / raw) To: Andi Kleen; +Cc: David Miller, netdev Le jeudi 13 octobre 2011 à 13:33 -0700, Andi Kleen a écrit : > On Thu, Oct 13, 2011 at 05:26:21PM +0200, Eric Dumazet wrote: > > skb truesize currently accounts for sk_buff struct and part of skb head. > > > > Considering that skb_shared_info is larger than sk_buff, its time to > > take it into account for better memory accounting. > > > > This patch introduces SKB_TRUESIZE(X) macro to centralize various > > assumptions into a single place. > > It's still quite inaccurate, especially for the kmalloced data area if it's not > paged. It would be better to ask slab how much memory was really > allocated. But at least this could be done more easily now with the new > macro, so it's definitely a step in the right direction. Note : in skb_alloc() function, SKB_TRUESIZE(size) delivers the exact value : I do the ksize(data) call to ask how many byte kmalloc() provided me. So skb->truesize is quite accurate (unless KMEMCHECK or other debug stuff is used of course) For the SKB_TRUESIZE() macro, we dont want to do a dummy call to kmalloc()/kfree(), since its basically used to roughly set a queue limit. Thanks ! ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2011-10-13 20:51 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2011-10-13 15:26 [PATCH net-next] net: more accurate skb truesize Eric Dumazet 2011-10-13 16:05 ` Ben Hutchings 2011-10-13 16:24 ` Eric Dumazet 2011-10-13 16:32 ` Ben Hutchings 2011-10-13 17:11 ` Eric Dumazet 2011-10-13 17:13 ` Ben Hutchings 2011-10-13 17:28 ` Eric Dumazet 2011-10-13 20:05 ` David Miller 2011-10-13 20:33 ` Andi Kleen 2011-10-13 20:51 ` Eric Dumazet
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.