* [RFC] net: skbuff: let struct skb_ext live inside the head
@ 2023-02-15 3:44 Jakub Kicinski
2023-02-15 8:53 ` Paolo Abeni
2023-02-15 9:43 ` Florian Westphal
0 siblings, 2 replies; 8+ messages in thread
From: Jakub Kicinski @ 2023-02-15 3:44 UTC (permalink / raw)
To: davem; +Cc: netdev, edumazet, pabeni, willemb, fw, Jakub Kicinski
This is a bit more crazy than the previous patch. For drivers
which already use build_skb() it's relatively easy to add more
space to the shinfo. Use this approach to place skb_ext inside
the head. No allocation needed.
This approach is a bit slower in trivial benchmarks than the recycling
because it requires extra cache line accesses (12.1% loss, ->18.6Gbps).
In-place skb_ext may be shorter than a full skb_ext object.
The driver only reserves space for exts it may use.
Any later addition will reallocate the space via CoW,
abandoning the in-place skb_ext and copying the data to
a full slab object.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
include/linux/skbuff.h | 29 +++++++-
net/core/gro.c | 1 +
net/core/skbuff.c | 156 +++++++++++++++++++++++++++++++++++++++--
3 files changed, 178 insertions(+), 8 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e68cb0a777b9..86486b667c94 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1257,6 +1257,8 @@ struct sk_buff *build_skb_around(struct sk_buff *skb,
void *data, unsigned int frag_size);
void skb_attempt_defer_free(struct sk_buff *skb);
+struct sk_buff *napi_build_skb_ext(void *data, unsigned int frag_size,
+ unsigned int ext_size);
struct sk_buff *napi_build_skb(void *data, unsigned int frag_size);
struct sk_buff *slab_build_skb(void *data);
@@ -4601,6 +4603,16 @@ enum skb_ext_id {
SKB_EXT_NUM, /* must be last */
};
+enum skb_ext_ref {
+ /* Normal skb ext from the kmem_cache */
+ SKB_EXT_ALLOC_SLAB,
+ /* Shard is a read-only skb ext placed after shinfo in the head,
+ * shards may be shorter than full skb_ext length!
+ */
+ SKB_EXT_ALLOC_SHARD_NOREF,
+ SKB_EXT_ALLOC_SHARD_REF,
+};
+
/**
* struct skb_ext - sk_buff extensions
* @refcnt: 1 on allocation, deallocated on 0
@@ -4613,6 +4625,7 @@ enum skb_ext_id {
*/
struct skb_ext {
refcount_t refcnt;
+ u8 alloc_type;
u8 offset[SKB_EXT_NUM]; /* in chunks of 8 bytes */
u8 chunks; /* same */
char data[] __aligned(8);
@@ -4623,8 +4636,10 @@ void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
struct skb_ext *ext);
void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id);
void *napi_skb_ext_add(struct sk_buff *skb, enum skb_ext_id id);
+void *skb_ext_add_inplace(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_put(struct skb_ext *ext);
+void __skb_ext_copy_get(struct skb_ext *ext);
static inline void skb_ext_put(struct sk_buff *skb)
{
@@ -4640,7 +4655,7 @@ static inline void __skb_ext_copy(struct sk_buff *dst,
if (src->active_extensions) {
struct skb_ext *ext = src->extensions;
- refcount_inc(&ext->refcnt);
+ __skb_ext_copy_get(ext);
dst->extensions = ext;
}
}
@@ -4699,6 +4714,18 @@ static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {}
static inline bool skb_has_extensions(struct sk_buff *skb) { return false; }
#endif /* CONFIG_SKB_EXTENSIONS */
+/* Head got merged into another skb, skb itself will be freed later by
+ * kfree_skb_partial() or napi_skb_free_stolen_head().
+ */
+static inline void skb_head_stolen(struct sk_buff *skb)
+{
+#ifdef CONFIG_SKB_EXTENSIONS
+ if (unlikely(skb->active_extensions) &&
+ skb->extensions->alloc_type == SKB_EXT_ALLOC_SHARD_NOREF)
+ skb->active_extensions = 0;
+#endif
+}
+
static inline void nf_reset_ct(struct sk_buff *skb)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
diff --git a/net/core/gro.c b/net/core/gro.c
index a606705a0859..f1e091fa0d4c 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -250,6 +250,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
delta_truesize = skb->truesize - new_truesize;
skb->truesize = new_truesize;
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
+ skb_head_stolen(skb);
goto done;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index feb5034b13ad..19e16d7533ac 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -128,6 +128,8 @@ const char * const drop_reasons[] = {
};
EXPORT_SYMBOL(drop_reasons);
+static int skb_ext_evacuate_head(struct sk_buff *skb);
+
/**
* skb_panic - private function for out-of-line support
* @skb: buffer
@@ -502,6 +504,35 @@ struct sk_buff *napi_build_skb(void *data, unsigned int frag_size)
}
EXPORT_SYMBOL(napi_build_skb);
+/**
+ * napi_build_skb_ext - build a network buffer with in-place skb_ext
+ * @data: data buffer provided by caller
+ * @frag_size: size of data
+ * @ext_size: size of space to reserve for skb_ext
+ *
+ * Version of napi_build_skb() that reserves space in the head for
+ * struct skb_ext. If @ext_size is zero or skb_ext_add_inplace()
+ * is never called there should be no performance loss compared
+ * to napi_build_skb().
+ *
+ * Returns a new &sk_buff on success, %NULL on allocation failure.
+ */
+struct sk_buff *
+napi_build_skb_ext(void *data, unsigned int frag_size, unsigned int ext_size)
+{
+ struct sk_buff *skb;
+
+ skb = __napi_build_skb(data, frag_size - ext_size);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb->head_frag = 1;
+ skb_propagate_pfmemalloc(virt_to_head_page(data), skb);
+
+ return skb;
+}
+EXPORT_SYMBOL(napi_build_skb_ext);
+
/*
* kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
* the caller if emergency pfmemalloc reserves are being used. If it is and
@@ -1257,7 +1288,9 @@ static void napi_skb_ext_put(struct sk_buff *skb)
if (!skb_ext_needs_destruct(ext)) {
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
- if (refcount_read(&ext->refcnt) == 1 && !nc->ext) {
+ if (refcount_read(&ext->refcnt) == 1 &&
+ ext->alloc_type == SKB_EXT_ALLOC_SLAB &&
+ !nc->ext) {
kasan_poison_object_data(skbuff_ext_cache, ext);
nc->ext = ext;
return;
@@ -2028,6 +2061,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
+ if (skb_ext_evacuate_head(skb))
+ return -ENOMEM;
+
data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL);
if (!data)
goto nodata;
@@ -5666,6 +5702,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
skb_fill_page_desc(to, to_shinfo->nr_frags,
page, offset, skb_headlen(from));
+ skb_head_stolen(from);
*fragstolen = true;
} else {
if (to_shinfo->nr_frags +
@@ -6389,6 +6426,9 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
+ if (skb_ext_evacuate_head(skb))
+ return -ENOMEM;
+
data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL);
if (!data)
return -ENOMEM;
@@ -6505,6 +6545,9 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
+ if (skb_ext_evacuate_head(skb))
+ return -ENOMEM;
+
data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL);
if (!data)
return -ENOMEM;
@@ -6639,10 +6682,11 @@ static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
}
-static void skb_ext_init(struct skb_ext *new)
+static void skb_ext_init_slab(struct skb_ext *new)
{
memset(new->offset, 0, sizeof(new->offset));
refcount_set(&new->refcnt, 1);
+ new->alloc_type = SKB_EXT_ALLOC_SLAB;
}
/**
@@ -6659,7 +6703,7 @@ struct skb_ext *__skb_ext_alloc(gfp_t flags)
struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags);
if (new)
- skb_ext_init(new);
+ skb_ext_init_slab(new);
return new;
}
@@ -6669,7 +6713,8 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
{
struct skb_ext *new;
- if (refcount_read(&old->refcnt) == 1)
+ if (refcount_read(&old->refcnt) == 1 &&
+ old->alloc_type == SKB_EXT_ALLOC_SLAB)
return old;
new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
@@ -6678,6 +6723,7 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
refcount_set(&new->refcnt, 1);
+ new->alloc_type = SKB_EXT_ALLOC_SLAB;
#ifdef CONFIG_XFRM
if (old_active & (1 << SKB_EXT_SEC_PATH)) {
@@ -6793,13 +6839,41 @@ void *napi_skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
return NULL;
}
- skb_ext_init(new);
+ skb_ext_init_slab(new);
}
return skb_ext_add_finalize(skb, id, new);
}
EXPORT_SYMBOL(napi_skb_ext_add);
+/**
+ * skb_ext_add_inplace - allocate ext space in napi_build_skb_ext() skb
+ * @skb: buffer
+ * @id: extension to allocate space for
+ *
+ * Creates an extension in a private skb allocated with napi_build_skb_ext().
+ * The caller must have allocated the @skb and must guarantee that there
+ * will be enough space for all the extensions.
+ *
+ * Returns pointer to the extension or NULL on failure.
+ */
+void *skb_ext_add_inplace(struct sk_buff *skb, enum skb_ext_id id)
+{
+ struct skb_ext *new = NULL;
+
+ if (!skb->active_extensions) {
+ new = (void *)&skb_shinfo(skb)[1];
+ skb->extensions = new;
+
+ memset(new->offset, 0, sizeof(new->offset));
+ refcount_set(&new->refcnt, 1);
+ new->alloc_type = SKB_EXT_ALLOC_SHARD_NOREF;
+ }
+
+ return skb_ext_add_finalize(skb, id, new);
+}
+EXPORT_SYMBOL(skb_ext_add_inplace);
+
#ifdef CONFIG_XFRM
static void skb_ext_put_sp(struct sec_path *sp)
{
@@ -6846,8 +6920,11 @@ void __skb_ext_put(struct skb_ext *ext)
if (refcount_read(&ext->refcnt) == 1)
goto free_now;
- if (!refcount_dec_and_test(&ext->refcnt))
+ if (!refcount_dec_and_test(&ext->refcnt)) {
+ if (ext->alloc_type == SKB_EXT_ALLOC_SHARD_REF)
+ goto free_shard;
return;
+ }
free_now:
#ifdef CONFIG_XFRM
if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
@@ -6858,9 +6935,74 @@ void __skb_ext_put(struct skb_ext *ext)
skb_ext_put_mctp(skb_ext_get_ptr(ext, SKB_EXT_MCTP));
#endif
- kmem_cache_free(skbuff_ext_cache, ext);
+ switch (ext->alloc_type) {
+ case SKB_EXT_ALLOC_SLAB:
+ kmem_cache_free(skbuff_ext_cache, ext);
+ break;
+ case SKB_EXT_ALLOC_SHARD_NOREF:
+ break;
+ case SKB_EXT_ALLOC_SHARD_REF:
+free_shard:
+ skb_free_frag(ext);
+ break;
+ }
}
EXPORT_SYMBOL(__skb_ext_put);
+
+/* Only safe to use as part of a copy operation */
+void __skb_ext_copy_get(struct skb_ext *ext)
+{
+ struct page *head_page;
+ unsigned int type;
+ int old;
+
+ __refcount_inc(&ext->refcnt, &old);
+
+ type = READ_ONCE(ext->alloc_type);
+ if (type == SKB_EXT_ALLOC_SLAB)
+ return;
+
+ head_page = virt_to_head_page(ext);
+ get_page(head_page);
+
+ /* First reference to a shard does not hold a reference to
+ * the underlying page, take it now. This function can only
+ * be called during copy, so caller has a reference on ext,
+ * we just took the second one - there is no risk that two
+ * callers will race to do this upgrade.
+ */
+ if (type == SKB_EXT_ALLOC_SHARD_NOREF && old == 1) {
+ get_page(head_page);
+ WRITE_ONCE(ext->alloc_type, SKB_EXT_ALLOC_SHARD_REF);
+ }
+}
+EXPORT_SYMBOL(__skb_ext_copy_get);
+
+static int skb_ext_evacuate_head(struct sk_buff *skb)
+{
+ struct skb_ext *old, *new;
+
+ if (likely(!skb->active_extensions))
+ return 0;
+ old = skb->extensions;
+ if (old->alloc_type != SKB_EXT_ALLOC_SHARD_NOREF)
+ return 0;
+
+ WARN_ON_ONCE(!skb->head_frag);
+ new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
+ if (!new)
+ return -ENOMEM;
+
+ memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
+ WARN_ON_ONCE(refcount_read(&old->refcnt) != 1);
+ refcount_set(&new->refcnt, 1);
+ new->alloc_type = SKB_EXT_ALLOC_SLAB;
+
+ skb->extensions = new;
+ /* We're dealing with NOREF and we copied contents, so no freeing */
+
+ return 0;
+}
#endif /* CONFIG_SKB_EXTENSIONS */
/**
--
2.39.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [RFC] net: skbuff: let struct skb_ext live inside the head
2023-02-15 3:44 [RFC] net: skbuff: let struct skb_ext live inside the head Jakub Kicinski
@ 2023-02-15 8:53 ` Paolo Abeni
2023-02-15 17:58 ` Jakub Kicinski
2023-02-15 9:43 ` Florian Westphal
1 sibling, 1 reply; 8+ messages in thread
From: Paolo Abeni @ 2023-02-15 8:53 UTC (permalink / raw)
To: Jakub Kicinski, davem; +Cc: netdev, edumazet, willemb, fw
On Tue, 2023-02-14 at 19:44 -0800, Jakub Kicinski wrote:
> This is a bit more crazy than the previous patch. For drivers
> which already use build_skb() it's relatively easy to add more
> space to the shinfo. Use this approach to place skb_ext inside
> the head. No allocation needed.
>
> This approach is a bit slower in trivial benchmarks than the recycling
> because it requires extra cache line accesses (12.1% loss, ->18.6Gbps).
>
> In-place skb_ext may be shorter than a full skb_ext object.
> The driver only reserves space for exts it may use.
> Any later addition will reallocate the space via CoW,
> abandoning the in-place skb_ext and copying the data to
> a full slab object.
I personally like the other option (the non RFC series) more. It looks
like this requires a quite larger code churn that could be error-prone
and even non interested parties/callers/code-paths may end-up paying
some extra overhead/additional cycles.
Still, if you are willing to experiment more this idea, I think you
could save the extra cacheline miss encoding the 'alloc_mode' into the
lower bits of skb->extensions (alike what _skb_refdst is doing with the
SKB_DST_NOREF flag).
Cheers,
Paolo
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC] net: skbuff: let struct skb_ext live inside the head
2023-02-15 8:53 ` Paolo Abeni
@ 2023-02-15 17:58 ` Jakub Kicinski
0 siblings, 0 replies; 8+ messages in thread
From: Jakub Kicinski @ 2023-02-15 17:58 UTC (permalink / raw)
To: Paolo Abeni; +Cc: davem, netdev, edumazet, willemb, fw
On Wed, 15 Feb 2023 09:53:54 +0100 Paolo Abeni wrote:
> Still, if you are willing to experiment more this idea, I think you
> could save the extra cacheline miss encoding the 'alloc_mode' into the
> lower bits of skb->extensions (alike what _skb_refdst is doing with the
> SKB_DST_NOREF flag).
I thought I'd start with a simpler approach where allocation type
is stored in the object itself, to limit the negative reactions :P
We could indeed save a cache miss (I think it'd be actually one fewer
miss than the current implementation, because most cases must end
up looking at the skb_ext, f.e. to read the refcount. The fact that
SHARD_NOREF implies refcnt == 1 can save us reading from skb_ext).
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC] net: skbuff: let struct skb_ext live inside the head
2023-02-15 3:44 [RFC] net: skbuff: let struct skb_ext live inside the head Jakub Kicinski
2023-02-15 8:53 ` Paolo Abeni
@ 2023-02-15 9:43 ` Florian Westphal
2023-02-15 14:37 ` Willem de Bruijn
2023-02-15 18:13 ` Jakub Kicinski
1 sibling, 2 replies; 8+ messages in thread
From: Florian Westphal @ 2023-02-15 9:43 UTC (permalink / raw)
To: Jakub Kicinski; +Cc: davem, netdev, edumazet, pabeni, willemb, fw
Jakub Kicinski <kuba@kernel.org> wrote:
> This is a bit more crazy than the previous patch. For drivers
> which already use build_skb() it's relatively easy to add more
> space to the shinfo. Use this approach to place skb_ext inside
> the head. No allocation needed.
>
> This approach is a bit slower in trivial benchmarks than the recycling
> because it requires extra cache line accesses (12.1% loss, ->18.6Gbps).
>
> In-place skb_ext may be shorter than a full skb_ext object.
> The driver only reserves space for exts it may use.
> Any later addition will reallocate the space via CoW,
> abandoning the in-place skb_ext and copying the data to
> a full slab object.
I think the cleaner solution would be to move the new extension ids
into sk_buff itself (at the end, uninitialized data unless used).
Those extensions would always reside there and not in the slab object.
Obviously that only makes sense for extensions where we assume
that typical workload will require them, which might be a hard call to
make.
I concur with Paolo that the napi-caching is nicer/less intrusive,
I think we have to wait and see if it helps with psp (async crypto
needed?) when it lands.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC] net: skbuff: let struct skb_ext live inside the head
2023-02-15 9:43 ` Florian Westphal
@ 2023-02-15 14:37 ` Willem de Bruijn
2023-02-15 18:10 ` Jakub Kicinski
2023-02-15 18:13 ` Jakub Kicinski
1 sibling, 1 reply; 8+ messages in thread
From: Willem de Bruijn @ 2023-02-15 14:37 UTC (permalink / raw)
To: Florian Westphal; +Cc: Jakub Kicinski, davem, netdev, edumazet, pabeni
On Wed, Feb 15, 2023 at 4:43 AM Florian Westphal <fw@strlen.de> wrote:
>
> Jakub Kicinski <kuba@kernel.org> wrote:
> > This is a bit more crazy than the previous patch. For drivers
> > which already use build_skb() it's relatively easy to add more
> > space to the shinfo. Use this approach to place skb_ext inside
> > the head. No allocation needed.
> >
> > This approach is a bit slower in trivial benchmarks than the recycling
> > because it requires extra cache line accesses (12.1% loss, ->18.6Gbps).
> >
> > In-place skb_ext may be shorter than a full skb_ext object.
> > The driver only reserves space for exts it may use.
> > Any later addition will reallocate the space via CoW,
> > abandoning the in-place skb_ext and copying the data to
> > a full slab object.
>
> I think the cleaner solution would be to move the new extension ids
> into sk_buff itself (at the end, uninitialized data unless used).
Grow struct sk_buff?
> Those extensions would always reside there and not in the slab object.
> Obviously that only makes sense for extensions where we assume
> that typical workload will require them, which might be a hard call to
> make.
>
> I concur with Paolo that the napi-caching is nicer/less intrusive,
> I think we have to wait and see if it helps with psp (async crypto
> needed?) when it lands.
How much data does psp need? The google version [1] embeds structs
psp_skb, which may include a 256b key. If on tx the key is looked up
from skb->sk, then on rx the only truly required field is the 32-bit
SPI, to match a decrypted packet's session key to the socket. With a
pointer hack on the lowest bits of skb->extensions such a tiny
extension could perhaps be embedded in the pointer field itself.
https://github.com/google/psp/blob/linux-v5.15-psp-v1.0/include/net/psp_defs.h
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC] net: skbuff: let struct skb_ext live inside the head
2023-02-15 14:37 ` Willem de Bruijn
@ 2023-02-15 18:10 ` Jakub Kicinski
0 siblings, 0 replies; 8+ messages in thread
From: Jakub Kicinski @ 2023-02-15 18:10 UTC (permalink / raw)
To: Willem de Bruijn; +Cc: Florian Westphal, davem, netdev, edumazet, pabeni
On Wed, 15 Feb 2023 09:37:09 -0500 Willem de Bruijn wrote:
> How much data does psp need? The google version [1] embeds structs
> psp_skb, which may include a 256b key. If on tx the key is looked up
> from skb->sk, then on rx the only truly required field is the 32-bit
> SPI, to match a decrypted packet's session key to the socket. With a
> pointer hack on the lowest bits of skb->extensions such a tiny
> extension could perhaps be embedded in the pointer field itself.
>
> https://github.com/google/psp/blob/linux-v5.15-psp-v1.0/include/net/psp_defs.h
So.. the most I could compress it to without sacrificing any security
was:
struct psp_skb_ext {
u32 spi;
u16 generation;
u8 version;
};
I took the liberty to cut the generation down to 16 bits. The version
is not necessary if we assume all machines and flow run a single
version. But then why is the auth-only version even in the spec :(
In any case, you're right that this would fit into the pointer with
minor clever encoding. It felt even more hacky than extending shinfo
TBH.
I'd be curious to hear other opinions!
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC] net: skbuff: let struct skb_ext live inside the head
2023-02-15 9:43 ` Florian Westphal
2023-02-15 14:37 ` Willem de Bruijn
@ 2023-02-15 18:13 ` Jakub Kicinski
2023-02-16 13:28 ` Florian Westphal
1 sibling, 1 reply; 8+ messages in thread
From: Jakub Kicinski @ 2023-02-15 18:13 UTC (permalink / raw)
To: Florian Westphal; +Cc: davem, netdev, edumazet, pabeni, willemb
On Wed, 15 Feb 2023 10:43:32 +0100 Florian Westphal wrote:
> I think the cleaner solution would be to move the new extension ids
> into sk_buff itself (at the end, uninitialized data unless used).
>
> Those extensions would always reside there and not in the slab object.
Do you mean the entire extension? 8B of metadata + (possibly) 32B
of the key?
> Obviously that only makes sense for extensions where we assume
> that typical workload will require them, which might be a hard call to
> make.
I'm guessing that's the reason why Google is okay with putting the key
in the skb - they know they will use it most of the time. But an
average RHEL user may appreciate the skb growth for an esoteric protocol
to a much smaller extent :(
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC] net: skbuff: let struct skb_ext live inside the head
2023-02-15 18:13 ` Jakub Kicinski
@ 2023-02-16 13:28 ` Florian Westphal
0 siblings, 0 replies; 8+ messages in thread
From: Florian Westphal @ 2023-02-16 13:28 UTC (permalink / raw)
To: Jakub Kicinski; +Cc: Florian Westphal, davem, netdev, edumazet, pabeni, willemb
Jakub Kicinski <kuba@kernel.org> wrote:
> On Wed, 15 Feb 2023 10:43:32 +0100 Florian Westphal wrote:
> > I think the cleaner solution would be to move the new extension ids
> > into sk_buff itself (at the end, uninitialized data unless used).
> >
> > Those extensions would always reside there and not in the slab object.
>
> Do you mean the entire extension? 8B of metadata + (possibly) 32B
> of the key?
32B is too much if its for something esoteric, but see below.
> > Obviously that only makes sense for extensions where we assume
> > that typical workload will require them, which might be a hard call to
> > make.
>
> I'm guessing that's the reason why Google is okay with putting the key
> in the skb - they know they will use it most of the time. But an
> average RHEL user may appreciate the skb growth for an esoteric protocol
> to a much smaller extent :(
Absolutely, I agree that its a non-starter to place this in sk_buff
itself. TX side is less of a problem here because of superpackets.
For RX I think your simpler napi-recycle patch is a good start.
I feel its better to wait before doing anything further in this
direction (e.g. array-of-cached extensions or whatever) until we've
a better test case/more realistic workload(s).
If we need to look at further allocation avoidances one thing that
could be evaluated would be placing an extension struct into
sk_buff_fclones (unioned with the fclone skb).
Fclone skb is marked busy, extension release clears it again.
Just something to keep in mind for later. Only downside I see is that
we can't release the extension area anymore before the skb gets queued.
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2023-02-16 13:28 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-02-15 3:44 [RFC] net: skbuff: let struct skb_ext live inside the head Jakub Kicinski
2023-02-15 8:53 ` Paolo Abeni
2023-02-15 17:58 ` Jakub Kicinski
2023-02-15 9:43 ` Florian Westphal
2023-02-15 14:37 ` Willem de Bruijn
2023-02-15 18:10 ` Jakub Kicinski
2023-02-15 18:13 ` Jakub Kicinski
2023-02-16 13:28 ` Florian Westphal
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).