From: Alexander Duyck <alexander.h.duyck@redhat.com>
To: netdev@vger.kernel.org
Cc: ast@plumgrid.com, davem@davemloft.net, eric.dumazet@gmail.com,
brouer@redhat.com
Subject: [net-next PATCH 2/6] net: Pull out core bits of __netdev_alloc_skb and add __napi_alloc_skb
Date: Tue, 09 Dec 2014 19:40:49 -0800 [thread overview]
Message-ID: <20141210034049.2114.84335.stgit@ahduyck-vm-fedora20> (raw)
In-Reply-To: <20141210033902.2114.68658.stgit@ahduyck-vm-fedora20>
This change pulls the core functionality out of __netdev_alloc_skb and
places them in a new function named __alloc_rx_skb. The reason for doing
this is to make these bits accessible to a new function __napi_alloc_skb.
In addition __alloc_rx_skb now has a new flags value that is used to
determine which page frag pool to allocate from. If the SKB_ALLOC_NAPI
flag is set then the NAPI pool is used. The advantage of this is that we
do not have to use local_irq_save/restore when accessing the NAPI pool from
NAPI context.
In my test setup I saw at least 11ns of savings using the napi_alloc_skb
function versus the netdev_alloc_skb function, most of this being due to
the fact that we didn't have to call local_irq_save/restore.
The main use case for napi_alloc_skb would be for things such as copybreak
or page fragment based receive paths where an skb is allocated after the
data has been received instead of before.
Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
---
include/linux/skbuff.h | 9 ++++++
net/core/dev.c | 2 +
net/core/skbuff.c | 74 +++++++++++++++++++++++++++++++++++++++++++-----
3 files changed, 77 insertions(+), 8 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b2b53b0..af79302 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -151,6 +151,7 @@ struct net_device;
struct scatterlist;
struct pipe_inode_info;
struct iov_iter;
+struct napi_struct;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack {
@@ -673,6 +674,7 @@ struct sk_buff {
#define SKB_ALLOC_FCLONE 0x01
#define SKB_ALLOC_RX 0x02
+#define SKB_ALLOC_NAPI 0x04
/* Returns true if the skb was allocated from PFMEMALLOC reserves */
static inline bool skb_pfmemalloc(const struct sk_buff *skb)
@@ -2165,6 +2167,13 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
}
void *napi_alloc_frag(unsigned int fragsz);
+struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
+ unsigned int length, gfp_t gfp_mask);
+static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
+ unsigned int length)
+{
+ return __napi_alloc_skb(napi, length, GFP_ATOMIC);
+}
/**
* __dev_alloc_pages - allocate page for network Rx
diff --git a/net/core/dev.c b/net/core/dev.c
index 3f191da..80f798d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4172,7 +4172,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
struct sk_buff *skb = napi->skb;
if (!skb) {
- skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
+ skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
napi->skb = skb;
}
return skb;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 56ed17c..ae13ef6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -444,10 +444,13 @@ void *napi_alloc_frag(unsigned int fragsz)
EXPORT_SYMBOL(napi_alloc_frag);
/**
- * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
- * @dev: network device to receive on
+ * __alloc_rx_skb - allocate an skbuff for rx
* @length: length to allocate
* @gfp_mask: get_free_pages mask, passed to alloc_skb
+ * @flags: If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
+ * allocations in case we have to fallback to __alloc_skb()
+ * If SKB_ALLOC_NAPI is set, page fragment will be allocated
+ * from napi_cache instead of netdev_cache.
*
* Allocate a new &sk_buff and assign it a usage count of one. The
* buffer has unspecified headroom built in. Users should allocate
@@ -456,11 +459,11 @@ EXPORT_SYMBOL(napi_alloc_frag);
*
* %NULL is returned if there is no free memory.
*/
-struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
- unsigned int length, gfp_t gfp_mask)
+static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask,
+ int flags)
{
struct sk_buff *skb = NULL;
- unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) +
+ unsigned int fragsz = SKB_DATA_ALIGN(length) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
@@ -469,7 +472,9 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
- data = __netdev_alloc_frag(fragsz, gfp_mask);
+ data = (flags & SKB_ALLOC_NAPI) ?
+ __napi_alloc_frag(fragsz, gfp_mask) :
+ __netdev_alloc_frag(fragsz, gfp_mask);
if (likely(data)) {
skb = build_skb(data, fragsz);
@@ -477,17 +482,72 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
put_page(virt_to_head_page(data));
}
} else {
- skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
+ skb = __alloc_skb(length, gfp_mask,
SKB_ALLOC_RX, NUMA_NO_NODE);
}
+ return skb;
+}
+
+/**
+ * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
+ * @dev: network device to receive on
+ * @length: length to allocate
+ * @gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ * Allocate a new &sk_buff and assign it a usage count of one. The
+ * buffer has NET_SKB_PAD headroom built in. Users should allocate
+ * the headroom they think they need without accounting for the
+ * built in space. The built in space is used for optimisations.
+ *
+ * %NULL is returned if there is no free memory.
+ */
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
+ unsigned int length, gfp_t gfp_mask)
+{
+ struct sk_buff *skb;
+
+ length += NET_SKB_PAD;
+ skb = __alloc_rx_skb(length, gfp_mask, 0);
+
if (likely(skb)) {
skb_reserve(skb, NET_SKB_PAD);
skb->dev = dev;
}
+
return skb;
}
EXPORT_SYMBOL(__netdev_alloc_skb);
+/**
+ * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
+ * @napi: napi instance this buffer was allocated for
+ * @length: length to allocate
+ * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
+ *
+ * Allocate a new sk_buff for use in NAPI receive. This buffer will
+ * attempt to allocate the head from a special reserved region used
+ * only for NAPI Rx allocation. By doing this we can save several
+ * CPU cycles by avoiding having to disable and re-enable IRQs.
+ *
+ * %NULL is returned if there is no free memory.
+ */
+struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
+ unsigned int length, gfp_t gfp_mask)
+{
+ struct sk_buff *skb;
+
+ length += NET_SKB_PAD + NET_IP_ALIGN;
+ skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI);
+
+ if (likely(skb)) {
+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+ skb->dev = napi->dev;
+ }
+
+ return skb;
+}
+EXPORT_SYMBOL(__napi_alloc_skb);
+
void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
int size, unsigned int truesize)
{
next prev parent reply other threads:[~2014-12-10 3:40 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-12-10 3:40 [net-next PATCH 0/6] net: Alloc NAPI page frags from their own pool Alexander Duyck
2014-12-10 3:40 ` [net-next PATCH 1/6] net: Split netdev_alloc_frag into __alloc_page_frag and add __napi_alloc_frag Alexander Duyck
2014-12-10 4:16 ` Alexei Starovoitov
2014-12-10 15:21 ` Alexander Duyck
2014-12-10 16:02 ` Eric Dumazet
2014-12-10 17:06 ` Alexander Duyck
2014-12-10 17:13 ` Eric Dumazet
2014-12-10 17:16 ` Alexander Duyck
2014-12-10 3:40 ` Alexander Duyck [this message]
2014-12-10 3:40 ` [net-next PATCH 3/6] ethernet/intel: Use napi_alloc_skb Alexander Duyck
2014-12-11 21:43 ` Jeff Kirsher
2014-12-10 3:41 ` [net-next PATCH 4/6] cxgb: Use napi_alloc_skb instead of netdev_alloc_skb_ip_align Alexander Duyck
2014-12-10 3:41 ` [net-next PATCH 5/6] ethernet/realtek: use " Alexander Duyck
2014-12-10 3:41 ` [net-next PATCH 6/6] ethernet/broadcom: Use " Alexander Duyck
2014-12-10 9:50 ` David Laight
2014-12-10 9:52 ` David Laight
2014-12-10 15:16 ` Alexander Duyck
2014-12-10 14:15 ` [RFC PATCH 0/3] Faster than SLAB caching of SKBs with qmempool (backed by alf_queue) Jesper Dangaard Brouer
2014-12-10 14:15 ` [RFC PATCH 1/3] lib: adding an Array-based Lock-Free (ALF) queue Jesper Dangaard Brouer
2014-12-11 19:15 ` David Miller
2014-12-10 14:15 ` [RFC PATCH 2/3] mm: qmempool - quick queue based memory pool Jesper Dangaard Brouer
2014-12-10 14:15 ` [RFC PATCH 3/3] net: use qmempool in-front of sk_buff kmem_cache Jesper Dangaard Brouer
2014-12-10 14:22 ` [RFC PATCH 0/3] Faster than SLAB caching of SKBs with qmempool (backed by alf_queue) David Laight
2014-12-10 14:40 ` Jesper Dangaard Brouer
2014-12-10 15:17 ` Christoph Lameter
2014-12-10 15:33 ` Jesper Dangaard Brouer
[not found] ` <20141210163321.0e4e4fd2-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2014-12-10 16:17 ` Christoph Lameter
2014-12-10 19:51 ` Christoph Lameter
2014-12-11 10:18 ` Jesper Dangaard Brouer
2014-12-10 18:32 ` [net-next PATCH 0/6] net: Alloc NAPI page frags from their own pool David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141210034049.2114.84335.stgit@ahduyck-vm-fedora20 \
--to=alexander.h.duyck@redhat.com \
--cc=ast@plumgrid.com \
--cc=brouer@redhat.com \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).