* purpose of the skb head pool
2003-04-29 11:55 purpose of the skb head pool Christoph Hellwig
2003-04-29 13:03 ` Andi Kleen
@ 2003-04-29 13:05 ` Robert Olsson
2003-05-01 10:38 ` Florian Weimer
1 sibling, 1 reply; 11+ messages in thread
From: Robert Olsson @ 2003-04-29 13:05 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: davem, netdev
[-- Attachment #1: message body text --]
[-- Type: text/plain, Size: 1105 bytes --]
Hello!
Christoph Hellwig writes:
> net/core/skbuf.c has a small per-cpu pool to keep some hot skbufs around
> instead of returning them to the system allocator. But if you loook
> at the slab allocator we'll have exactly that same code duplicated in
> there (see functions ac_data, __cache_alloc and kmem_cache_alloc in
> slab.c). So is there some other reason why this pool is needed?
Well just happened test without it yesterday...
Manfred is working on some improvements of the slab (magazine layer) so I
tested this. It seems to do improve performance. I also removed the
skb_head_pool for a test run.
2.6.66 IP. Forwarding of two input simplex flows. eth0->eth1, eth2->eth3
Fixed affinity CPU0: eth0, eth3. CPU1: eth1, eth2. Which common for routing
and should be "worst case" for other use. The test should give a very high
load on the packet memory system. As seen at least we don't see any
improvement from skb_head_pool code.
Vanilla 2.5.66 381 kpps
Magazine 431 kpps
Magazine + no skb_head_pool 435 kpps
Cheers.
--ro
[-- Attachment #2: rem_skb_head_pool.pat --]
[-- Type: application/octet-stream, Size: 3581 bytes --]
--- linux/net/core/skbuff.c.030428 2003-04-01 13:24:14.000000000 +0200
+++ linux/net/core/skbuff.c 2003-04-28 16:53:54.000000000 +0200
@@ -20,7 +20,7 @@
* Ray VanTassle : Fixed --skb->lock in free
* Alan Cox : skb_copy copy arp field
* Andi Kleen : slabified it.
+ * Robert Olsson : Removed skb_head_pool
*
* NOTE:
* The __skb_ routines should be called with interrupts
@@ -64,15 +64,8 @@
#include <asm/uaccess.h>
#include <asm/system.h>
-int sysctl_hot_list_len = 128;
-
static kmem_cache_t *skbuff_head_cache;
-static union {
- struct sk_buff_head list;
- char pad[SMP_CACHE_BYTES];
-} skb_head_pool[NR_CPUS];
-
/*
* Keep out-of-line to prevent kernel bloat.
* __builtin_return_address is not used because it is not always
@@ -110,44 +103,6 @@
BUG();
}
-static __inline__ struct sk_buff *skb_head_from_pool(void)
-{
- struct sk_buff_head *list;
- struct sk_buff *skb = NULL;
- unsigned long flags;
-
- local_irq_save(flags);
-
- list = &skb_head_pool[smp_processor_id()].list;
-
- if (skb_queue_len(list))
- skb = __skb_dequeue(list);
-
- local_irq_restore(flags);
- return skb;
-}
-
-static __inline__ void skb_head_to_pool(struct sk_buff *skb)
-{
- struct sk_buff_head *list;
- unsigned long flags;
-
- local_irq_save(flags);
-
- list = &skb_head_pool[smp_processor_id()].list;
-
- if (skb_queue_len(list) < sysctl_hot_list_len) {
- __skb_queue_head(list, skb);
- local_irq_restore(flags);
-
- return;
- }
-
- local_irq_restore(flags);
- kmem_cache_free(skbuff_head_cache, skb);
-}
-
-
/* Allocate a new skbuff. We do this ourselves so we can fill in a few
* 'private' fields and also do memory statistics to find all the
* [BEEP] leaks.
@@ -182,13 +137,10 @@
}
/* Get the HEAD */
- skb = skb_head_from_pool();
- if (!skb) {
- skb = kmem_cache_alloc(skbuff_head_cache,
+ skb = kmem_cache_alloc(skbuff_head_cache,
gfp_mask & ~__GFP_DMA);
if (!skb)
goto out;
- }
/* Get the DATA. Size must match skb_add_mtu(). */
size = SKB_DATA_ALIGN(size);
@@ -207,7 +159,7 @@
out:
return skb;
nodata:
- skb_head_to_pool(skb);
+ kmem_cache_free(skbuff_head_cache, skb);
skb = NULL;
goto out;
}
@@ -257,7 +209,7 @@
void kfree_skbmem(struct sk_buff *skb)
{
skb_release_data(skb);
- skb_head_to_pool(skb);
+ kmem_cache_free(skbuff_head_cache, skb);
}
/**
@@ -327,13 +279,10 @@
struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
{
- struct sk_buff *n = skb_head_from_pool();
+ struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
- if (!n) {
- n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
- if (!n)
- return NULL;
- }
+ if (!n)
+ return NULL;
#define C(x) n->x = skb->x
@@ -1240,7 +1189,4 @@
NULL, NULL);
if (!skbuff_head_cache)
panic("cannot create skbuff cache");
-
- for (i = 0; i < NR_CPUS; i++)
- skb_queue_head_init(&skb_head_pool[i].list);
}
--- linux/net/core/sysctl_net_core.c.030428 2003-03-24 23:00:18.000000000 +0100
+++ linux/net/core/sysctl_net_core.c 2003-04-28 16:59:05.000000000 +0200
@@ -28,7 +28,6 @@
extern int sysctl_core_destroy_delay;
extern int sysctl_optmem_max;
-extern int sysctl_hot_list_len;
#ifdef CONFIG_NET_DIVERT
extern char sysctl_divert_version[];
@@ -150,14 +149,6 @@
.mode = 0644,
.proc_handler = &proc_dointvec
},
- {
- .ctl_name = NET_CORE_HOT_LIST_LENGTH,
- .procname = "hot_list_length",
- .data = &sysctl_hot_list_len,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
#ifdef CONFIG_NET_DIVERT
{
.ctl_name = NET_CORE_DIVERT_VERSION,
[-- Attachment #3: message body text --]
[-- Type: text/plain, Size: 8542 bytes --]
Vanilla slab. Input streams 2*534 kpps
======================================
CPU0 CPU1
24: 9 65 IO-APIC-level eth2
25: 54545 13 IO-APIC-level eth3
26: 78 0 IO-APIC-level eth0
27: 23 62315 IO-APIC-level eth1
Iface MTU Met RX-OK RX-ERR RX-DRP RX-OVR TX-OK TX-ERR TX-DRP TX-OVR Flags
eth0 1500 0 3339339 8828613 8828613 6660669 33 0 0 0 BRU
eth1 1500 0 57 0 0 0 3339340 0 0 0 BRU
eth2 1500 0 3800145 8670213 8670213 6199858 27 0 0 0 BRU
eth3 1500 0 1 0 0 0 3800144 0 0 0 BRU
0032f44f 00000000 00002ffa 00000000 00000000 00000000 00000000 00000000 00000000
0039fc87 00000000 00003373 00000000 00000000 00000000 00000000 00000000 00000000
With slab magazine patch. Input streams 2*534 kpps
==================================================
Iface MTU Met RX-OK RX-ERR RX-DRP RX-OVR TX-OK TX-ERR TX-DRP TX-OVR Flags
eth0 1500 0 3936399 8370562 8370562 6063606 31 0 0 0 BRU
eth1 1500 0 58 0 0 0 3936403 0 0 0 BRU
eth2 1500 0 4142687 8308862 8308862 5857316 27 0 0 0 BRU
eth3 1500 0 1 0 0 0 4142686 0 0 0 BRU
003c1090 00000000 000034d1 00000000 00000000 00000000 00000000 00000000 00000000
003f3699 00000000 00003722 00000000 00000000 00000000 00000000 00000000 00000000
CPU0 CPU1
24: 9 81 IO-APIC-level eth2
25: 64461 14 IO-APIC-level eth3
26: 94 0 IO-APIC-level eth0
27: 20 67759 IO-APIC-level eth1
Daemon started.
Profiler running.
Stopping profiling.
Cpu type: P4 / Xeon
Cpu speed was (MHz estimation) : 1799.59
Counter 0 counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (count cycles when processor is active) count 180000
vma samples %-age symbol name
c02152c0 89894 13.7456 alloc_skb
c022af44 69317 10.5992 ip_output
c021fd90 64410 9.84889 qdisc_restart
c01c8608 60224 9.20882 e1000_clean_tx_irq
c021fbb8 48501 7.41626 eth_type_trans
c021558c 42143 6.44406 skb_release_data
c02156e4 36162 5.52951 __kfree_skb
c01378b8 26285 4.01922 kmalloc
c022645c 25858 3.95393 ip_route_input
c01c87c0 23820 3.6423 e1000_clean_rx_irq
c01c76c8 23261 3.55683 e1000_xmit_frame
c0218a60 17677 2.70298 dev_queue_xmit
c01374e0 12685 1.93966 cache_alloc_refill
c0137a00 11319 1.73078 kfree
c010fda0 11077 1.69378 do_gettimeofday
c0228314 10202 1.55998 ip_rcv
c0114050 8899 1.36074 get_offset_tsc
c021567c 8394 1.28352 kfree_skbmem
c0229700 6794 1.03887 ip_forward
c01c8bf0 6531 0.998651 e1000_alloc_rx_buffers
c01c8554 6379 0.975409 e1000_clean
c021cadc 5234 0.800328 neigh_resolve_output
c02190f0 5106 0.780755 netif_receive_skb
c01c8468 4658 0.712252 e1000_intr
c0114068 3542 0.541605 mark_offset_tsc
c0222320 2156 0.329673 pfifo_dequeue
Cpu type: P4 / Xeon
Cpu speed was (MHz estimation) : 1799.59
Counter 7 counted MISPRED_BRANCH_RETIRED events (retired mispredicted branches) with a unit mask of 0x01 (retired instruction is non-bogus) count 18000
vma samples %-age symbol name
c022af44 718 27.7113 ip_output
c021fd90 513 19.7993 qdisc_restart
c02156e4 360 13.8942 __kfree_skb
c01c8608 261 10.0733 e1000_clean_tx_irq
c0218a60 85 3.28059 dev_queue_xmit
c01c8bf0 85 3.28059 e1000_alloc_rx_buffers
c0137a00 53 2.04554 kfree
c01c87c0 50 1.92976 e1000_clean_rx_irq
c021558c 48 1.85257 skb_release_data
c01378b8 46 1.77538 kmalloc
c021fbb8 41 1.5824 eth_type_trans
c0222320 36 1.38942 pfifo_dequeue
c0228314 31 1.19645 ip_rcv
c01c76c8 30 1.15785 e1000_xmit_frame
c02152c0 24 0.926283 alloc_skb
c022645c 19 0.733308 ip_route_input
c01377d0 19 0.733308 cache_flusharray
c010c750 17 0.656117 do_IRQ
c01c8554 15 0.578927 e1000_clean
c01168b8 13 0.501737 end_level_ioapic_irq
c02190f0 12 0.463142 netif_receive_skb
c01374e0 12 0.463142 cache_alloc_refill
c0120eb0 11 0.424547 do_softirq
c0229700 10 0.385951 ip_forward
c01c8468 10 0.385951 e1000_intr
c01245e8 9 0.347356 run_timer_softirq
With slab magazine patch and skb_head_pool removed. Input streams 2*533 kpps
============================================================================
Iface MTU Met RX-OK RX-ERR RX-DRP RX-OVR TX-OK TX-ERR TX-DRP TX-OVR Flags
eth0 1500 0 4070842 8257568 8257568 5929162 32 0 0 0 BRU
eth1 1500 0 60 0 0 0 4070844 0 0 0 BRU
eth2 1500 0 4097594 8285413 8285413 5902409 27 0 0 0 BRU
eth3 1500 0 1 0 0 0 4097593 0 0 0 BRU
003e1dc4 00000000 000036e4 00000000 00000000 00000000 00000000 00000000 00000000
003e866f 00000000 00003711 00000000 00000000 00000000 00000000 00000000 00000000
CPU0 CPU1
24: 9 156 IO-APIC-level eth2
25: 66506 8 IO-APIC-level eth3
26: 170 0 IO-APIC-level eth0
27: 23 66807 IO-APIC-level eth1
NMI: 0 0
LOC: 357638 357637
ERR: 0
MIS: 0
Daemon started.
Profiler running.
Stopping profiling.
Cpu type: P4 / Xeon
Cpu speed was (MHz estimation) : 1799.55
Counter 0 counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (count cycles when processor is active) count 180000
vma samples %-age symbol name
c02152c0 90698 12.1819 alloc_skb
c022adf4 81812 10.9884 ip_output
c021fc40 74295 9.97875 qdisc_restart
c01c8608 67518 9.06852 e1000_clean_tx_irq
c021fa68 55350 7.4342 eth_type_trans
c02154ec 45940 6.17032 skb_release_data
c02155f8 41834 5.61883 __kfree_skb
c01c87c0 31218 4.19297 e1000_clean_rx_irq
c022630c 29209 3.92314 ip_route_input
c01c76c8 27768 3.72959 e1000_xmit_frame
c01378b8 20298 2.72628 kmalloc
c0218910 19762 2.65428 dev_queue_xmit
c01374e0 14245 1.91328 cache_alloc_refill
c0137a00 13807 1.85445 kfree
c010fda0 13406 1.80059 do_gettimeofday
c0137874 12601 1.69247 kmem_cache_alloc
c02281c4 11403 1.53157 ip_rcv
c0114050 9854 1.32352 get_offset_tsc
c01379b8 8743 1.17429 kmem_cache_free
c01c8bf0 8369 1.12406 e1000_alloc_rx_buffers
c02295b0 7607 1.02172 ip_forward
c01c8554 6912 0.928368 e1000_clean
c01c8468 5636 0.756986 e1000_intr
c0218fa0 5413 0.727034 netif_receive_skb
c01d2194 4363 0.586006 ide_insw
c0114068 3972 0.533489 mark_offset_tsc
Cpu type: P4 / Xeon
Cpu speed was (MHz estimation) : 1799.55
Counter 7 counted MISPRED_BRANCH_RETIRED events (retired mispredicted branches) with a unit mask of 0x01 (retired instruction is non-bogus) count 18000
vma samples %-age symbol name
c022adf4 766 26.3049 ip_output
c021fc40 617 21.1882 qdisc_restart
c02155f8 378 12.9808 __kfree_skb
c01c8608 298 10.2335 e1000_clean_tx_irq
c01c8bf0 120 4.12088 e1000_alloc_rx_buffers
c0218910 114 3.91484 dev_queue_xmit
c01c87c0 85 2.91896 e1000_clean_rx_irq
c0137a00 71 2.43819 kfree
c021fa68 42 1.44231 eth_type_trans
c02154ec 40 1.37363 skb_release_data
c01c76c8 39 1.33929 e1000_xmit_frame
c01378b8 31 1.06456 kmalloc
c02221d0 27 0.927198 pfifo_dequeue
c02295b0 23 0.789835 ip_forward
c02281c4 22 0.755495 ip_rcv
c02152c0 20 0.686813 alloc_skb
c01c8468 17 0.583791 e1000_intr
c01377d0 17 0.583791 cache_flusharray
c01c8554 16 0.549451 e1000_clean
c022630c 15 0.51511 ip_route_input
c01374e0 14 0.480769 cache_alloc_refill
c01168b8 13 0.446429 end_level_ioapic_irq
c010c750 10 0.343407 do_IRQ
c0114050 9 0.309066 get_offset_tsc
c0110074 9 0.309066 timer_interrupt
c02191f4 8 0.274725 net_rx_action
^ permalink raw reply [flat|nested] 11+ messages in thread