--- linux-2.4.19.p3/include/linux/skbuff.h Fri Aug 2 17:39:46 2002 +++ linux-2.4.19.p4/include/linux/skbuff.h Wed Nov 6 22:21:52 2002 @@ -14,6 +14,8 @@ #ifndef _LINUX_SKBUFF_H #define _LINUX_SKBUFF_H +#define CONFIG_NET_SKB_RECYCLING + #include #include #include @@ -25,6 +27,7 @@ #include #include #include +#include /* PACKET_HOST */ #define HAVE_ALLOC_SKB /* For the drivers to know */ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ @@ -194,6 +197,11 @@ unsigned char *end; /* End pointer */ void (*destructor)(struct sk_buff *); /* Destruct function */ +#ifdef CONFIG_NET_SKB_RECYCLING + struct net_device *recycle_dev; /* Device we arrived on */ + int tag; /* Device private tag. */ +#endif + #ifdef CONFIG_NETFILTER /* Can be used for communication between hooks. */ unsigned long nfmark; @@ -1109,6 +1117,45 @@ #endif } + +/* + * Slab constructor for a skb head. + */ +static inline void skb_headerinit(void *p, kmem_cache_t *cache, + unsigned long flags) +{ + struct sk_buff *skb = p; + + skb->next = NULL; + skb->prev = NULL; + skb->list = NULL; + skb->sk = NULL; + skb->stamp.tv_sec=0; /* No idea about time */ + skb->dev = NULL; + skb->dst = NULL; + memset(skb->cb, 0, sizeof(skb->cb)); + skb->pkt_type = PACKET_HOST; /* Default type */ + skb->ip_summed = 0; + skb->priority = 0; + skb->security = 0; /* By default packets are insecure */ + skb->destructor = NULL; + +#ifdef CONFIG_NET_SKB_RECYCLING + skb->recycle_dev = 0; +#endif + +#ifdef CONFIG_NETFILTER + skb->nfmark = skb->nfcache = 0; + skb->nfct = NULL; +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug = 0; +#endif +#endif +#ifdef CONFIG_NET_SCHED + skb->tc_index = 0; +#endif +} + #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ (skb != (struct sk_buff *)(queue)); \ --- linux-2.4.19.p3/net/core/skbuff.c Fri Aug 2 17:39:46 2002 +++ linux-2.4.19.p4/net/core/skbuff.c Tue Nov 5 22:02:57 2002 @@ -217,40 +217,6 @@ } -/* - * Slab constructor for a skb head. - */ -static inline void skb_headerinit(void *p, kmem_cache_t *cache, - unsigned long flags) -{ - struct sk_buff *skb = p; - - skb->next = NULL; - skb->prev = NULL; - skb->list = NULL; - skb->sk = NULL; - skb->stamp.tv_sec=0; /* No idea about time */ - skb->dev = NULL; - skb->dst = NULL; - memset(skb->cb, 0, sizeof(skb->cb)); - skb->pkt_type = PACKET_HOST; /* Default type */ - skb->ip_summed = 0; - skb->priority = 0; - skb->security = 0; /* By default packets are insecure */ - skb->destructor = NULL; - -#ifdef CONFIG_NETFILTER - skb->nfmark = skb->nfcache = 0; - skb->nfct = NULL; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif -#endif -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -#endif -} - static void skb_drop_fraglist(struct sk_buff *skb) { struct sk_buff *list = skb_shinfo(skb)->frag_list; @@ -326,8 +292,15 @@ #ifdef CONFIG_NETFILTER nf_conntrack_put(skb->nfct); #endif - skb_headerinit(skb, NULL, 0); /* clean state */ - kfree_skbmem(skb); + +#ifdef CONFIG_NET_SKB_RECYCLING + if(skb->recycle_dev && skb->recycle_dev->skb_recycle ) { + if(skb->recycle_dev->skb_recycle(skb)) return; + } +#endif + + skb_headerinit(skb, NULL, 0); /* clean state */ + kfree_skbmem(skb); } /** @@ -384,6 +357,9 @@ C(tail); C(end); n->destructor = NULL; +#ifdef CONFIG_NET_SKB_RECYCLING + skb->recycle_dev = 0; +#endif #ifdef CONFIG_NETFILTER C(nfmark); C(nfcache); @@ -428,6 +404,9 @@ new->pkt_type=old->pkt_type; new->stamp=old->stamp; new->destructor = NULL; +#ifdef CONFIG_NET_SKB_RECYCLING + new->recycle_dev = 0; +#endif new->security=old->security; #ifdef CONFIG_NETFILTER new->nfmark=old->nfmark; --- linux-2.4.19.p3/Documentation/networking/skb_recycling.txt Wed Dec 31 16:00:00 1969 +++ linux-2.4.19.p4/Documentation/networking/skb_recycling.txt Tue Nov 5 22:02:57 2002 @@ -0,0 +1,186 @@ + +skb reuse. +----------- + +Q: Why? +A: With skb recycling one has the option of recycling the skb with the + driver that allocated it in the first place. This decreases the need + to malloc memory for each packet, and also should help keep from + invalidating the cache so often. This all leads to higher performance + networking, and also provides better ways to tune a system to your high + performance needs. + +Q: Slab does the job already. +A: Yes and RC uses slab for object coloring etc but can have some advances + of having a closer loop. Also there are some upcoming hardware that needs + this skb handling. + +Q: With this memory will be allocated in "private" that kernel cannot use? +A: Yes true. But do deal with a new driver method is added. "mem_reclaim" + this can be called from anyone (kernel) to ask the driver to give back + allocated memory. The amount of memory kept by the driver can be made + run-time adjustable easily, and it can also be specified at module load + time. + +Q: Isn't the same job to be done now at the driver instead of at kfree? +A: No by knowing that the same skb is returned the driver/allocator can do + a minimal refresh of the skb header and avoid the relatively costly + alloc and free of the "data" part. Just a minimal "refresh" is needed + the when driver gets it's old skb back. The skb was good before... + Also this can be used to re-route an skb to initialized in CPU where + is was created. With SMP the TX interrupt can come in any CPU and this + causes cache bouncing. Eventually we can reduce this be marking + the skb where is was created and at the recycler put it back on that + list. Not slab uses per-CPU lists but just put the skb back on the + "current" slab. + +Q: SMP and L2 cache locality? +A: Driver can have "per-CPU" recycling and stores recycled skb's in LIFO + this should result in L2 cache friendliness. Tests to be done... + +Q: Compatibility? Does it break "old" drivers? +A: No, because old drivers do not mark any recycler callback. Alloc and + kfree runs as usual. + +Q: The skb's are added lot of states as the they travel through the IP + stack. How is this handled? +A: Well we wait util the "states" are properly handled we have no hurry + to recycle the skb and clearing of the states has to be done anyway. + +Q: Is it proven in "real life" yet? +A: No. It's research and under development + It works for me. --Ben + + +1) Implementation + + +* Kernel part. + + + +* Driver part. + + +Recycling callback and skb buffers in e1000 +=========================================== +In the private driver field: + + +#ifdef CONFIG_NET_SKB_RECYCLING + unsigned int cnt[NR_CPUS]; + + union { + struct sk_buff_head list; + char pad[SMP_CACHE_BYTES]; + } e1000_recycle[NR_CPUS]; + +#endif + + +The main recycler +================= + + +int skb_hotlist = 300; + +int e1000_recycle(struct sk_buff *skb) +{ + + /* Note! skb->skb_recycle CANNOT be NULL here */ + struct e1000_adapter *adapter = skb->recycle_dev->priv; + + /* Store for right CPU. For this we use skb->tag */ + struct sk_buff_head *list = &adapter->e1000_recycle[skb->tag].list; + + /* + decrease our outstanding skb's: + 1) either we store in the list OR + 2) we ignore so gets to kfree + */ + + adapter->cnt[smp_processor_id()]--; + + if (skb_queue_len(list) <= skb_hotlist) { + + /* LIFO queue for cache friendliness */ + + skb_queue_head(list, skb); + return 1; + } + return 0; +} + + +At open: +======== + + for (i=0; ie1000_recycle[i].list); + } + +At close: +========= + + /* Schedule while outstanding skb's exists */ + + for (i=0; icnt[i]) { + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } + } + + for (i=0; ie1000_recycle[i].list; + while ((skb=skb_dequeue(list))!=NULL) { + skb->recycle_dev = NULL; + kfree_skb(skb); + } + + } + + +When allocting RX buffers: +========================== + + skb = skb_dequeue(list); /* Try recycler list */ + + if(skb) { + skb_headerinit(skb, NULL, 0); /* clean state */ + + /* NOTE. e1000 uses not dev_alloc_skb */ + + skb->data = skb->head; + skb->tail = skb->head; + skb->len = 0; + adapter->RC_hit++; + } + else adapter->RC_miss++; + + if(!skb) + + skb = alloc_skb(adapter->rx_buffer_len + reserve_len, GFP_ATOMIC); + + if(!skb) { + /* Better luck next round */ + break; + } + + adapter->cnt[smp_processor_id()]++; + skb->tag = smp_processor_id(); + skb->recycle_dev = netdev; + skb->recycle_dev->skb_recycle = e1000_recycle; + + +And to well behaved kernel citizen +================================== +void e1000_mem_reclaim(struct net_device *dev) +{ +/* Someone (kernel probably) is asking us to reduce memory usage */ + + /* If we use RC we purge private buffers etc.*/ + /* TODO: */ + +}