* [PATCH] net: add destructor for skb data (rewritten)
@ 2008-04-18 4:21 Rusty Russell
2008-04-19 9:35 ` David Miller
2008-04-19 9:46 ` Evgeniy Polyakov
0 siblings, 2 replies; 5+ messages in thread
From: Rusty Russell @ 2008-04-18 4:21 UTC (permalink / raw)
To: netdev; +Cc: Max Krasnyansky, Herbert Xu, David Miller
If we want to notify something when an skb is truly finished (such as
for tun vringfd support), we need a destructor on the data.
This turns out to be slightly non-trivial as fragments from one skb
get copied to another skb: if the first skb has a destructor (or its
parent does) we need to keep a reference to it and destroy it only
when (all the) children are destroyed. We add an 'orig' pointer to
the skb_shared_info to do this.
But there's currently no way to get from the shinfo to the head (to
kfree it), so we add a 'len' field. A better alternative to this
might be to move the skb_shared_info to before the head of the skb data.
Note that the destructor is responsible for calling kfree: for the tun
device, this is critical since the destructor can be called from any
context and it has to do a copy_to_user, so it queues the skb.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
include/linux/skbuff.h | 9 +++++++
net/core/skbuff.c | 62 ++++++++++++++++++++++++++++++++++++++++---------
2 files changed, 60 insertions(+), 11 deletions(-)
diff -r 78decd088c5d include/linux/skbuff.h
--- a/include/linux/skbuff.h Thu Apr 17 07:20:31 2008 +1000
+++ b/include/linux/skbuff.h Fri Apr 18 10:33:19 2008 +1000
@@ -146,8 +146,12 @@ struct skb_shared_info {
unsigned short gso_segs;
unsigned short gso_type;
__be32 ip6_frag_id;
+ unsigned int len; /* Subtract from this shinfo to find skb->head */
struct sk_buff *frag_list;
skb_frag_t frags[MAX_SKB_FRAGS];
+ struct skb_shared_info *orig;
+ /* This is responsible for kfree() of header. */
+ void (*destructor)(struct skb_shared_info *);
};
/* We divide dataref into two halves. The higher 16 bits hold references
@@ -852,6 +856,11 @@ static inline void skb_fill_page_desc(st
#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list)
#define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb))
+static inline unsigned char *skb_shinfo_to_head(struct skb_shared_info *shinfo)
+{
+ return (unsigned char *)shinfo - shinfo->len;
+}
+
#ifdef NET_SKBUFF_DATA_USES_OFFSET
static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
{
diff -r 78decd088c5d net/core/skbuff.c
--- a/net/core/skbuff.c Thu Apr 17 07:20:31 2008 +1000
+++ b/net/core/skbuff.c Fri Apr 18 10:33:19 2008 +1000
@@ -218,6 +218,9 @@ struct sk_buff *__alloc_skb(unsigned int
shinfo->gso_type = 0;
shinfo->ip6_frag_id = 0;
shinfo->frag_list = NULL;
+ shinfo->destructor = NULL;
+ shinfo->orig = NULL;
+ shinfo->len = skb_end_pointer(skb) - skb->head;
if (fclone) {
struct sk_buff *child = skb + 1;
@@ -289,21 +292,50 @@ static void skb_clone_fraglist(struct sk
skb_get(list);
}
+static void shinfo_put(struct skb_shared_info *shinfo, bool nohdr)
+{
+ struct skb_shared_info *orig;
+
+ do {
+ if (atomic_sub_return(nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
+ &shinfo->dataref))
+ return;
+
+ if (shinfo->nr_frags) {
+ int i;
+ for (i = 0; i < shinfo->nr_frags; i++)
+ put_page(shinfo->frags[i].page);
+ }
+
+ if (shinfo->frag_list)
+ skb_drop_list(&shinfo->frag_list);
+
+ orig = shinfo->orig;
+ if (shinfo->destructor)
+ shinfo->destructor(shinfo);
+ else
+ kfree(skb_shinfo_to_head(shinfo));
+
+ /* We hold a payload reference to our parent. */
+ nohdr = true;
+ } while ((shinfo = orig) != NULL);
+}
+
static void skb_release_data(struct sk_buff *skb)
{
- if (!skb->cloned ||
- !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
- &skb_shinfo(skb)->dataref)) {
- if (skb_shinfo(skb)->nr_frags) {
- int i;
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
- }
+ if (!skb->cloned)
+ shinfo_put(skb_shinfo(skb), skb->nohdr);
+}
- if (skb_shinfo(skb)->frag_list)
- skb_drop_fraglist(skb);
+/* Now hold reference to older data, if has a destructor (recursively). */
+static void skb_ref_data_parent(struct sk_buff *parent,
+ struct skb_shared_info *shinfo)
+{
+ struct skb_shared_info *pshinfo = skb_shinfo(parent);
- kfree(skb->head);
+ if (pshinfo->destructor || pshinfo->orig) {
+ shinfo->orig = pshinfo;
+ atomic_add((1 << SKB_DATAREF_SHIFT) + 1, &pshinfo->dataref);
}
}
@@ -634,6 +666,7 @@ struct sk_buff *pskb_copy(struct sk_buff
get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
+ skb_ref_data_parent(skb, skb_shinfo(n));
}
if (skb_shinfo(skb)->frag_list) {
@@ -699,6 +732,8 @@ int pskb_expand_head(struct sk_buff *skb
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
+ skb_ref_data_parent(skb, (void *)(data + size));
+
skb_release_data(skb);
off = (data + nhead) - skb->head;
@@ -721,6 +756,7 @@ int pskb_expand_head(struct sk_buff *skb
skb->hdr_len = 0;
skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
+ skb_shinfo(skb)->len = skb_end_pointer(skb) - skb->head;
return 0;
nodata:
@@ -1868,6 +1904,8 @@ void skb_split(struct sk_buff *skb, stru
skb_split_inside_header(skb, skb1, len, pos);
else /* Second chunk has no header, nothing to copy. */
skb_split_no_header(skb, skb1, len, pos);
+
+ skb_ref_data_parent(skb, skb_shinfo(skb1));
}
/**
@@ -2240,6 +2278,8 @@ struct sk_buff *skb_segment(struct sk_bu
nskb->data_len = len - hsize;
nskb->len += nskb->data_len;
nskb->truesize += nskb->data_len;
+
+ skb_ref_data_parent(skb, skb_shinfo(nskb));
} while ((offset += len) < skb->len);
return segs;
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH] net: add destructor for skb data (rewritten)
2008-04-18 4:21 [PATCH] net: add destructor for skb data (rewritten) Rusty Russell
@ 2008-04-19 9:35 ` David Miller
2008-04-19 16:20 ` Rusty Russell
2008-04-19 9:46 ` Evgeniy Polyakov
1 sibling, 1 reply; 5+ messages in thread
From: David Miller @ 2008-04-19 9:35 UTC (permalink / raw)
To: rusty; +Cc: netdev, maxk, herbert
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 18 Apr 2008 14:21:25 +1000
> If we want to notify something when an skb is truly finished (such as
> for tun vringfd support), we need a destructor on the data.
>
> This turns out to be slightly non-trivial as fragments from one skb
> get copied to another skb: if the first skb has a destructor (or its
> parent does) we need to keep a reference to it and destroy it only
> when (all the) children are destroyed. We add an 'orig' pointer to
> the skb_shared_info to do this.
>
> But there's currently no way to get from the shinfo to the head (to
> kfree it), so we add a 'len' field. A better alternative to this
> might be to move the skb_shared_info to before the head of the skb data.
>
> Note that the destructor is responsible for calling kfree: for the tun
> device, this is critical since the destructor can be called from any
> context and it has to do a copy_to_user, so it queues the skb.
>
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
I'm mostly ambivalent but I will say I'm not happy about all of this
extra state you're adding even though it's "only" to the SKB data
shared-info struct and not sk_buff properly.
Does this handle SKB frags of arbitrary depth? SKB's can be nested to
arbitrary depths via the frag mechanism.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] net: add destructor for skb data (rewritten)
2008-04-19 9:35 ` David Miller
@ 2008-04-19 16:20 ` Rusty Russell
0 siblings, 0 replies; 5+ messages in thread
From: Rusty Russell @ 2008-04-19 16:20 UTC (permalink / raw)
To: David Miller; +Cc: netdev, maxk, herbert
On Saturday 19 April 2008 19:35:24 David Miller wrote:
> From: Rusty Russell <rusty@rustcorp.com.au>
> Date: Fri, 18 Apr 2008 14:21:25 +1000
>
> > If we want to notify something when an skb is truly finished (such as
> > for tun vringfd support), we need a destructor on the data.
> >
> > This turns out to be slightly non-trivial as fragments from one skb
> > get copied to another skb: if the first skb has a destructor (or its
> > parent does) we need to keep a reference to it and destroy it only
> > when (all the) children are destroyed. We add an 'orig' pointer to
> > the skb_shared_info to do this.
> >
> > But there's currently no way to get from the shinfo to the head (to
> > kfree it), so we add a 'len' field. A better alternative to this
> > might be to move the skb_shared_info to before the head of the skb data.
> >
> > Note that the destructor is responsible for calling kfree: for the tun
> > device, this is critical since the destructor can be called from any
> > context and it has to do a copy_to_user, so it queues the skb.
> >
> > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
>
> I'm mostly ambivalent but I will say I'm not happy about all of this
> extra state you're adding even though it's "only" to the SKB data
> shared-info struct and not sk_buff properly.
Me neither. Moving the shared_info to the front of the data would reduce it
to two fields for me (removing len and destructor arg), but I held off for
now because this is a lesser change and possible for 2.6.26.
> Does this handle SKB frags of arbitrary depth? SKB's can be nested to
> arbitrary depths via the frag mechanism.
It doesn't matter in this case. It's the skb creator who sets the destructor,
and wants it called when all pages in shinfo->frags[] are done with. If it
wanted to also include the frag_list in this lifetime, it would simply
set ->orig on those skbs's shinfo to point back to the head shinfo, and
adjust dataref accordingly.
As long as the anyone referencing a frags page from an skb into another sets
the orig ptr & bumps dataref, this will work. If someone kept a reference to
a page and then freed the skb it game from, we're already broken.
You could think of it as a 'struct request' for networking. Or not.
Cheers,
Rusty.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] net: add destructor for skb data (rewritten)
2008-04-18 4:21 [PATCH] net: add destructor for skb data (rewritten) Rusty Russell
2008-04-19 9:35 ` David Miller
@ 2008-04-19 9:46 ` Evgeniy Polyakov
2008-04-19 15:49 ` Rusty Russell
1 sibling, 1 reply; 5+ messages in thread
From: Evgeniy Polyakov @ 2008-04-19 9:46 UTC (permalink / raw)
To: Rusty Russell; +Cc: netdev, Max Krasnyansky, Herbert Xu, David Miller
On Fri, Apr 18, 2008 at 02:21:25PM +1000, Rusty Russell (rusty@rustcorp.com.au) wrote:
> If we want to notify something when an skb is truly finished (such as
> for tun vringfd support), we need a destructor on the data.
Can we put its invokation before pages are freed?
> +static void shinfo_put(struct skb_shared_info *shinfo, bool nohdr)
> +{
> + struct skb_shared_info *orig;
> +
> + do {
> + if (atomic_sub_return(nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
> + &shinfo->dataref))
> + return;
> +
> + if (shinfo->nr_frags) {
> + int i;
> + for (i = 0; i < shinfo->nr_frags; i++)
> + put_page(shinfo->frags[i].page);
> + }
> +
> + if (shinfo->frag_list)
> + skb_drop_list(&shinfo->frag_list);
> +
> + orig = shinfo->orig;
> + if (shinfo->destructor)
> + shinfo->destructor(shinfo);
If it is first, we can process handle pages inside destructor, otherwise
they can be out of our control.
> + else
> + kfree(skb_shinfo_to_head(shinfo));
> +
> + /* We hold a payload reference to our parent. */
> + nohdr = true;
> + } while ((shinfo = orig) != NULL);
> +}
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH] net: add destructor for skb data (rewritten)
2008-04-19 9:46 ` Evgeniy Polyakov
@ 2008-04-19 15:49 ` Rusty Russell
0 siblings, 0 replies; 5+ messages in thread
From: Rusty Russell @ 2008-04-19 15:49 UTC (permalink / raw)
To: Evgeniy Polyakov; +Cc: netdev, Max Krasnyansky, Herbert Xu, David Miller
On Saturday 19 April 2008 19:46:01 Evgeniy Polyakov wrote:
> On Fri, Apr 18, 2008 at 02:21:25PM +1000, Rusty Russell
(rusty@rustcorp.com.au) wrote:
> > If we want to notify something when an skb is truly finished (such as
> > for tun vringfd support), we need a destructor on the data.
>
> Can we put its invokation before pages are freed?
Sure. I don't need that, but it's easy to change.
Cheers,
Rusty.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2008-04-19 16:20 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-18 4:21 [PATCH] net: add destructor for skb data (rewritten) Rusty Russell
2008-04-19 9:35 ` David Miller
2008-04-19 16:20 ` Rusty Russell
2008-04-19 9:46 ` Evgeniy Polyakov
2008-04-19 15:49 ` Rusty Russell
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).