netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: xiaohui.xin@intel.com
To: netdev@vger.kernel.org, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, mst@redhat.com, mingo@elte.hu,
	davem@davemloft.net, herbert@gondor.apana.org.au,
	jdike@linux.intel.com
Cc: Xin Xiaohui <xiaohui.xin@intel.com>
Subject: [RFC PATCH v7 08/19] Make __alloc_skb() to get external buffer.
Date: Sat,  5 Jun 2010 18:14:47 +0800	[thread overview]
Message-ID: <1275732899-5423-8-git-send-email-xiaohui.xin@intel.com> (raw)
In-Reply-To: <1275732899-5423-7-git-send-email-xiaohui.xin@intel.com>

From: Xin Xiaohui <xiaohui.xin@intel.com>

Add a dev parameter to __alloc_skb(), skb->data
points to external buffer, recompute skb->head,
maintain shinfo of the external buffer, record
external buffer info into destructor_arg field.

Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
Signed-off-by: Zhao Yu <yzhao81new@gmail.com>
Reviewed-by: Jeff Dike <jdike@linux.intel.com>
---

        __alloc_skb() cleanup by

        Jeff Dike <jdike@linux.intel.com>

 include/linux/skbuff.h |    7 ++++---
 net/core/skbuff.c      |   43 +++++++++++++++++++++++++++++++++++++------
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 281a1c0..5ff8c27 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -442,17 +442,18 @@ extern void kfree_skb(struct sk_buff *skb);
 extern void consume_skb(struct sk_buff *skb);
 extern void	       __kfree_skb(struct sk_buff *skb);
 extern struct sk_buff *__alloc_skb(unsigned int size,
-				   gfp_t priority, int fclone, int node);
+				   gfp_t priority, int fclone,
+				   int node, struct net_device *dev);
 static inline struct sk_buff *alloc_skb(unsigned int size,
 					gfp_t priority)
 {
-	return __alloc_skb(size, priority, 0, -1);
+	return __alloc_skb(size, priority, 0, -1, NULL);
 }
 
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 					       gfp_t priority)
 {
-	return __alloc_skb(size, priority, 1, -1);
+	return __alloc_skb(size, priority, 1, -1, NULL);
 }
 
 extern int skb_recycle_check(struct sk_buff *skb, int skb_size);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fbdb1f1..38d19d0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -161,7 +161,8 @@ EXPORT_SYMBOL(skb_under_panic);
  *	@fclone: allocate from fclone cache instead of head cache
  *		and allocate a cloned (child) skb
  *	@node: numa node to allocate memory on
- *
+ *	@dev: a device owns the skb if the skb try to get external buffer.
+ *		otherwise is NULL.
  *	Allocate a new &sk_buff. The returned buffer has no headroom and a
  *	tail room of size bytes. The object has a reference count of one.
  *	The return is the buffer. On a failure the return is %NULL.
@@ -170,12 +171,13 @@ EXPORT_SYMBOL(skb_under_panic);
  *	%GFP_ATOMIC.
  */
 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
-			    int fclone, int node)
+			    int fclone, int node, struct net_device *dev)
 {
 	struct kmem_cache *cache;
 	struct skb_shared_info *shinfo;
 	struct sk_buff *skb;
-	u8 *data;
+	u8 *data = NULL;
+	struct skb_external_page *ext_page = NULL;
 
 	cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
 
@@ -185,8 +187,23 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 		goto out;
 
 	size = SKB_DATA_ALIGN(size);
-	data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
-			gfp_mask, node);
+
+	/* If the device wants to do mediate passthru(zero-copy),
+	 * the skb may try to get external buffers from outside.
+	 * If fails, then fall back to alloc buffers from kernel.
+	 */
+	if (dev && dev->mp_port) {
+		ext_page = netdev_alloc_external_page(dev, skb, size);
+		if (ext_page) {
+			data = ext_page->start;
+			size = ext_page->size;
+		}
+	}
+
+	if (!data)
+		data = kmalloc_node_track_caller(
+				size + sizeof(struct skb_shared_info),
+				gfp_mask, node);
 	if (!data)
 		goto nodata;
 
@@ -208,6 +225,15 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb->mac_header = ~0U;
 #endif
 
+	/* If the skb get external buffers sucessfully, since the shinfo is
+	 * at the end of the buffer, we may retain the shinfo once we
+	 * need it sometime.
+	 */
+	if (ext_page) {
+		skb->head = skb->data - NET_IP_ALIGN - NET_SKB_PAD;
+		memcpy(ext_page->ushinfo, skb_shinfo(skb),
+		       sizeof(struct skb_shared_info));
+	}
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
 	atomic_set(&shinfo->dataref, 1);
@@ -231,6 +257,11 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
 		child->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
+	/* Record the external buffer info in this field. It's not so good,
+	 * but we cannot find another place easily.
+	 */
+	shinfo->destructor_arg = ext_page;
+
 out:
 	return skb;
 nodata:
@@ -259,7 +290,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
 	int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
 	struct sk_buff *skb;
 
-	skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
+	skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node, dev);
 	if (likely(skb)) {
 		skb_reserve(skb, NET_SKB_PAD);
 		skb->dev = dev;
-- 
1.5.4.4


  reply	other threads:[~2010-06-05 10:07 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-05 10:14 [RFC PATCH v7 01/19] Add a new structure for skb buffer from external xiaohui.xin
2010-06-05 10:14 ` [RFC PATCH v7 02/19] Add a new struct for device to manipulate external buffer xiaohui.xin
2010-06-05 10:14   ` [RFC PATCH v7 03/19] Export 2 func for device to assign/deassign new strucure xiaohui.xin
2010-06-05 10:14     ` [RFC PATCH v7 04/19] Add a ndo_mp_port_prep pointer to net_device_ops xiaohui.xin
2010-06-05 10:14       ` [RFC PATCH v7 05/19] Add a function make external buffer owner to query capability xiaohui.xin
2010-06-05 10:14         ` [RFC PATCH v7 06/19] Add a function to indicate if device use external buffer xiaohui.xin
2010-06-05 10:14           ` [RFC PATCH v7 07/19] Add interface to get external buffers xiaohui.xin
2010-06-05 10:14             ` xiaohui.xin [this message]
2010-06-05 10:14               ` [RFC PATCH v7 09/19] Ignore room skb_reserve() when device is using external buffer xiaohui.xin
2010-06-05 10:14                 ` [RFC PATCH v7 10/19] Don't do skb recycle, if device use " xiaohui.xin
2010-06-05 10:14                   ` [RFC PATCH v7 11/19] Use callback to deal with skb_release_data() specially xiaohui.xin
2010-06-05 10:14                     ` [RFC PATCH v7 12/19] Add a hook to intercept external buffers from NIC driver xiaohui.xin
2010-06-05 10:14                       ` [RFC PATCH v7 13/19] To skip GRO if buffer is external currently xiaohui.xin
2010-06-05 10:14                         ` [RFC PATCH v7 14/19] Add header file for mp device xiaohui.xin
2010-06-05 10:14                           ` [RFC PATCH v7 15/19] Add basic funcs and ioctl to " xiaohui.xin
2010-06-05 10:14                             ` [RFC PATCH v7 16/19] Manipulate external buffers in " xiaohui.xin
2010-06-05 10:14                               ` [RFC PATCH v7 17/19] Export proto_ops to vhost-net driver xiaohui.xin
2010-06-05 10:14                                 ` [RFC PATCH v7 18/19] Add a kconfig entry and make entry for mp device xiaohui.xin
2010-06-05 10:14                                   ` [RFC PATCH v7 19/19] Provides multiple submits and asynchronous notifications xiaohui.xin
2010-06-05 10:14                                     ` [RFC PATCH v7 00/19] Provide a zero-copy method on KVM virtio-net xiaohui.xin
2010-06-05 14:56                     ` [RFC PATCH v7 11/19] Use callback to deal with skb_release_data() specially Eric Dumazet
2010-06-09  7:30                       ` Xin, Xiaohui
2010-06-05 14:53               ` [RFC PATCH v7 08/19] Make __alloc_skb() to get external buffer Eric Dumazet
2010-06-09  7:34                 ` Xin, Xiaohui
2010-06-05 14:51     ` [RFC PATCH v7 03/19] Export 2 func for device to assign/deassign new strucure Eric Dumazet
2010-06-06 23:13 ` [RFC PATCH v7 01/19] Add a new structure for skb buffer from external Stephen Hemminger
2010-06-07  7:51   ` Andi Kleen
2010-06-07  8:17     ` Mitchell Erblich
2010-06-09  9:22       ` Xin, Xiaohui
2010-06-09  8:48     ` Xin, Xiaohui
2010-06-08  5:27   ` Herbert Xu
2010-06-09  9:54     ` Xin, Xiaohui
2010-06-11  5:21       ` Herbert Xu
2010-06-12  9:31         ` Xin, Xiaohui
2010-06-13  8:58           ` Xin, Xiaohui
2010-06-17 11:21             ` Herbert Xu
2010-06-18  5:26               ` Xin, Xiaohui
2010-06-18  5:59                 ` Herbert Xu
2010-06-18  7:14                   ` Xin, Xiaohui
2010-06-18  7:45                     ` Herbert Xu
2010-06-20 10:06                     ` Michael S. Tsirkin
2010-06-20 10:32                       ` Herbert Xu
2010-06-20 10:39                         ` Michael S. Tsirkin
2010-06-20 11:02                           ` Herbert Xu
2010-06-20 11:11                             ` Michael S. Tsirkin
2010-06-20 11:36                               ` Herbert Xu
2010-06-20 11:47                                 ` Michael S. Tsirkin
2010-06-20 11:59                                   ` Herbert Xu
2010-06-20 12:48                                     ` Michael S. Tsirkin
2010-06-20 15:19                                     ` Ben Hutchings
2010-06-23  8:09                 ` Dong, Eddie
2010-06-23  9:52                   ` Herbert Xu
2010-06-23 10:05                     ` Dong, Eddie
2010-06-24 10:08                       ` Herbert Xu
2010-06-25  1:03                         ` Dong, Eddie
2010-06-25 11:06                           ` Michael S. Tsirkin
2010-06-27  6:14                           ` Herbert Xu
2010-06-28  9:56                             ` Xin, Xiaohui
2010-06-28 10:00                               ` Michael S. Tsirkin
2010-07-03  9:12                               ` Herbert Xu
2010-06-25  2:07                         ` Xin, Xiaohui
2010-06-17 11:20           ` Herbert Xu
2010-06-09  8:29   ` Xin, Xiaohui

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1275732899-5423-8-git-send-email-xiaohui.xin@intel.com \
    --to=xiaohui.xin@intel.com \
    --cc=davem@davemloft.net \
    --cc=herbert@gondor.apana.org.au \
    --cc=jdike@linux.intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).