[PATCH v3] net: af_packet: don't call tpacket_destruct

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
@ 2010-09-23 10:15 Changli Gao
  2010-09-23 12:29 ` Eric Dumazet
  2010-09-24  6:36 ` Jarek Poplawski
  0 siblings, 2 replies; 13+ messages in thread
From: Changli Gao @ 2010-09-23 10:15 UTC (permalink / raw)
  To: David S. Miller
  Cc: Eric Dumazet, Oliver Hartkopp, Michael S. Tsirkin, netdev,
	Changli Gao

Since skb->destructor() is used to account socket memory, and maybe called
before the skb is sent out, a corrupt skb maybe sent out finally.

A new destructor is added into structure skb_shared_info(), and it won't
be called until the last reference to the data of an skb is put. af_packet
uses this destructor instead.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
---
v3: rename destructor to data_destructor, destructor_arg to data_destructor_arg,
    fix splice the skbs generated by AF_PACKET socket to the pipe.
v2: avoid kmalloc/kfree
 include/linux/skbuff.h |    7 ++++---
 net/core/skbuff.c      |   29 ++++++++++++++++++++---------
 net/packet/af_packet.c |   25 ++++++++++++-------------
 3 files changed, 36 insertions(+), 25 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9e8085a..0854135 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -191,15 +191,16 @@ struct skb_shared_info {
 	__u8		tx_flags;
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
+	void		(*data_destructor)(struct sk_buff *skb);
 
 	/*
 	 * Warning : all fields before dataref are cleared in __alloc_skb()
 	 */
 	atomic_t	dataref;
 
-	/* Intermediate layers must ensure that destructor_arg
-	 * remains valid until skb destructor */
-	void *		destructor_arg;
+	/* Intermediate layers must ensure that data_destructor_arg
+	 * remains valid until skb data destructor */
+	void		*data_destructor_arg[2];
 	/* must be last field, see pskb_expand_head() */
 	skb_frag_t	frags[MAX_SKB_FRAGS];
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 752c197..95a48fb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -332,10 +332,14 @@ static void skb_release_data(struct sk_buff *skb)
 	if (!skb->cloned ||
 	    !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
 			       &skb_shinfo(skb)->dataref)) {
-		if (skb_shinfo(skb)->nr_frags) {
+		struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+		if (shinfo->data_destructor)
+			shinfo->data_destructor(skb);
+		if (shinfo->nr_frags) {
 			int i;
-			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-				put_page(skb_shinfo(skb)->frags[i].page);
+			for (i = 0; i < shinfo->nr_frags; i++)
+				put_page(shinfo->frags[i].page);
 		}
 
 		if (skb_has_frag_list(skb))
@@ -497,9 +501,12 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size)
 	if (skb_shared(skb) || skb_cloned(skb))
 		return false;
 
+	shinfo = skb_shinfo(skb);
+	if (shinfo->data_destructor)
+		return false;
+
 	skb_release_head_state(skb);
 
-	shinfo = skb_shinfo(skb);
 	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
 
@@ -799,7 +806,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 
 	memcpy((struct skb_shared_info *)(data + size),
 	       skb_shinfo(skb),
-	       offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
+	       offsetof(struct skb_shared_info,
+			frags[skb_shinfo(skb)->nr_frags]));
+	skb_shinfo(skb)->data_destructor = NULL;
 
 	/* Check if we can avoid taking references on fragments if we own
 	 * the last reference on skb->head. (see skb_release_data())
@@ -1408,7 +1417,7 @@ new_page:
 static inline int spd_fill_page(struct splice_pipe_desc *spd,
 				struct pipe_inode_info *pipe, struct page *page,
 				unsigned int *len, unsigned int offset,
-				struct sk_buff *skb, int linear,
+				struct sk_buff *skb, bool linear,
 				struct sock *sk)
 {
 	if (unlikely(spd->nr_pages == pipe->buffers))
@@ -1446,7 +1455,7 @@ static inline void __segment_seek(struct page **page, unsigned int *poff,
 static inline int __splice_segment(struct page *page, unsigned int poff,
 				   unsigned int plen, unsigned int *off,
 				   unsigned int *len, struct sk_buff *skb,
-				   struct splice_pipe_desc *spd, int linear,
+				   struct splice_pipe_desc *spd, bool linear,
 				   struct sock *sk,
 				   struct pipe_inode_info *pipe)
 {
@@ -1498,7 +1507,7 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
 	if (__splice_segment(virt_to_page(skb->data),
 			     (unsigned long) skb->data & (PAGE_SIZE - 1),
 			     skb_headlen(skb),
-			     offset, len, skb, spd, 1, sk, pipe))
+			     offset, len, skb, spd, true, sk, pipe))
 		return 1;
 
 	/*
@@ -1508,7 +1517,9 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
 		if (__splice_segment(f->page, f->page_offset, f->size,
-				     offset, len, skb, spd, 0, sk, pipe))
+				     offset, len, skb, spd,
+				     skb_shinfo(skb)->data_destructor != NULL,
+				     sk, pipe))
 			return 1;
 	}
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3616f27..ecf57c7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -825,19 +825,19 @@ ring_is_full:
 
 static void tpacket_destruct_skb(struct sk_buff *skb)
 {
-	struct packet_sock *po = pkt_sk(skb->sk);
-	void *ph;
-
-	BUG_ON(skb == NULL);
+	struct packet_sock *po;
 
+	po = pkt_sk(skb_shinfo(skb)->data_destructor_arg[0]);
 	if (likely(po->tx_ring.pg_vec)) {
-		ph = skb_shinfo(skb)->destructor_arg;
+		void *ph = skb_shinfo(skb)->data_destructor_arg[1];
+
 		BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
 		BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
 		atomic_dec(&po->tx_ring.pending);
 		__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
 	}
 
+	skb->sk = &po->sk;
 	sock_wfree(skb);
 }
 
@@ -862,7 +862,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb->dev = dev;
 	skb->priority = po->sk.sk_priority;
 	skb->mark = po->sk.sk_mark;
-	skb_shinfo(skb)->destructor_arg = ph.raw;
 
 	switch (po->tp_version) {
 	case TPACKET_V2:
@@ -884,9 +883,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	to_write = tp_len;
 
 	if (sock->type == SOCK_DGRAM) {
-		err = dev_hard_header(skb, dev, ntohs(proto), addr,
-				NULL, tp_len);
-		if (unlikely(err < 0))
+		if (unlikely(dev_hard_header(skb, dev, ntohs(proto), addr,
+					     NULL, tp_len) < 0))
 			return -EINVAL;
 	} else if (dev->hard_header_len) {
 		/* net device doesn't like empty head */
@@ -897,8 +895,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		}
 
 		skb_push(skb, dev->hard_header_len);
-		err = skb_store_bits(skb, 0, data,
-				dev->hard_header_len);
+		err = skb_store_bits(skb, 0, data, dev->hard_header_len);
 		if (unlikely(err))
 			return err;
 
@@ -906,7 +903,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		to_write -= dev->hard_header_len;
 	}
 
-	err = -EFAULT;
 	page = virt_to_page(data);
 	offset = offset_in_page(data);
 	len_max = PAGE_SIZE - offset;
@@ -1028,7 +1024,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 			}
 		}
 
-		skb->destructor = tpacket_destruct_skb;
+		skb_shinfo(skb)->data_destructor_arg[0] = &po->sk;
+		skb_shinfo(skb)->data_destructor_arg[1] = ph;
+		skb->destructor = NULL;
+		skb_shinfo(skb)->data_destructor = tpacket_destruct_skb;
 		__packet_set_status(po, ph, TP_STATUS_SENDING);
 		atomic_inc(&po->tx_ring.pending);
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
  2010-09-23 10:15 [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out Changli Gao
@ 2010-09-23 12:29 ` Eric Dumazet
  2010-09-23 14:17   ` Changli Gao
  2010-09-24  6:36 ` Jarek Poplawski
  1 sibling, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2010-09-23 12:29 UTC (permalink / raw)
  To: Changli Gao; +Cc: David S. Miller, Oliver Hartkopp, Michael S. Tsirkin, netdev

Le jeudi 23 septembre 2010 à 18:15 +0800, Changli Gao a écrit :
> Since skb->destructor() is used to account socket memory, and maybe called
> before the skb is sent out, a corrupt skb maybe sent out finally.
> 
> A new destructor is added into structure skb_shared_info(), and it won't
> be called until the last reference to the data of an skb is put. af_packet
> uses this destructor instead.
> 
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ---
> v3: rename destructor to data_destructor, destructor_arg to data_destructor_arg,
>     fix splice the skbs generated by AF_PACKET socket to the pipe.

I dont understand this.

Could you describe how splice(from af_packet to pipe) is possible with
af_packet send path ?

Also, on such risky patch, could you please avoid inserting cleanups ?
I am referring to these bits :



@@ -884,9 +883,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
        to_write = tp_len;
 
        if (sock->type == SOCK_DGRAM) {
-               err = dev_hard_header(skb, dev, ntohs(proto), addr,
-                               NULL, tp_len);
-               if (unlikely(err < 0))
+               if (unlikely(dev_hard_header(skb, dev, ntohs(proto), addr,
+                                            NULL, tp_len) < 0))
                        return -EINVAL;
        } else if (dev->hard_header_len) {
                /* net device doesn't like empty head */
@@ -897,8 +895,7 @@ static int tpacket_fill_skb(struct packet_sock *po,
struct sk_buff *skb,
                }
 
                skb_push(skb, dev->hard_header_len);
-               err = skb_store_bits(skb, 0, data,
-                               dev->hard_header_len);
+               err = skb_store_bits(skb, 0, data, dev->hard_header_len);
                if (unlikely(err))
                        return err;
 
@@ -906,7 +903,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
                to_write -= dev->hard_header_len;
        }
 
-       err = -EFAULT;
        page = virt_to_page(data);
        offset = offset_in_page(data);
        len_max = PAGE_SIZE - offset;




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
  2010-09-23 12:29 ` Eric Dumazet
@ 2010-09-23 14:17   ` Changli Gao
  2010-09-23 14:41     ` Eric Dumazet
  0 siblings, 1 reply; 13+ messages in thread
From: Changli Gao @ 2010-09-23 14:17 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David S. Miller, Oliver Hartkopp, Michael S. Tsirkin, netdev

On Thu, Sep 23, 2010 at 8:29 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le jeudi 23 septembre 2010 à 18:15 +0800, Changli Gao a écrit :
>> Since skb->destructor() is used to account socket memory, and maybe called
>> before the skb is sent out, a corrupt skb maybe sent out finally.
>>
>> A new destructor is added into structure skb_shared_info(), and it won't
>> be called until the last reference to the data of an skb is put. af_packet
>> uses this destructor instead.
>>
>> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
>> ---
>> v3: rename destructor to data_destructor, destructor_arg to data_destructor_arg,
>>     fix splice the skbs generated by AF_PACKET socket to the pipe.
>
> I dont understand this.
>
> Could you describe how splice(from af_packet to pipe) is possible with
> af_packet send path ?

af_packet sends packets to lo(127.0.0.1), and a local socket is
receiving packets via splice.

>
> Also, on such risky patch, could you please avoid inserting cleanups ?
> I am referring to these bits :
>

OK. Thanks.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
  2010-09-23 14:17   ` Changli Gao
@ 2010-09-23 14:41     ` Eric Dumazet
  0 siblings, 0 replies; 13+ messages in thread
From: Eric Dumazet @ 2010-09-23 14:41 UTC (permalink / raw)
  To: Changli Gao; +Cc: David S. Miller, Oliver Hartkopp, Michael S. Tsirkin, netdev

Le jeudi 23 septembre 2010 à 22:17 +0800, Changli Gao a écrit :
> On Thu, Sep 23, 2010 at 8:29 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > Le jeudi 23 septembre 2010 à 18:15 +0800, Changli Gao a écrit :
> >> Since skb->destructor() is used to account socket memory, and maybe called
> >> before the skb is sent out, a corrupt skb maybe sent out finally.
> >>
> >> A new destructor is added into structure skb_shared_info(), and it won't
> >> be called until the last reference to the data of an skb is put. af_packet
> >> uses this destructor instead.
> >>
> >> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> >> ---
> >> v3: rename destructor to data_destructor, destructor_arg to data_destructor_arg,
> >>     fix splice the skbs generated by AF_PACKET socket to the pipe.
> >
> > I dont understand this.
> >
> > Could you describe how splice(from af_packet to pipe) is possible with
> > af_packet send path ?
> 
> af_packet sends packets to lo(127.0.0.1), and a local socket is
> receiving packets via splice.

Ouch

I am pretty sure too many things will break if we allow such packets to
get back in input path.

(think of tcp coalescing for example...)




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
  2010-09-23 10:15 [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out Changli Gao
  2010-09-23 12:29 ` Eric Dumazet
@ 2010-09-24  6:36 ` Jarek Poplawski
  2010-09-24  7:01   ` Eric Dumazet
                     ` (2 more replies)
  1 sibling, 3 replies; 13+ messages in thread
From: Jarek Poplawski @ 2010-09-24  6:36 UTC (permalink / raw)
  To: Changli Gao
  Cc: David S. Miller, Eric Dumazet, Oliver Hartkopp,
	Michael S. Tsirkin, netdev

On 2010-09-23 12:15, Changli Gao wrote:
> Since skb->destructor() is used to account socket memory, and maybe called
> before the skb is sent out, a corrupt skb maybe sent out finally.
> 
> A new destructor is added into structure skb_shared_info(), and it won't
> be called until the last reference to the data of an skb is put. af_packet
> uses this destructor instead.

IMHO, we shouldn't allow for fixing the bad design of one protocol at
the expense of others by adding more and more conditionals. The proper
way of handling paged skbs (splice compatible) exists. And the current
patch doesn't even fix the problem completely against things like
pskb_expand_head or pskb_copy.

af_packet could check some flag which guarantees the queued dev can do
skb_orphan after the real xmit and copy buffers otherwise.

Jarek P.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
  2010-09-24  6:36 ` Jarek Poplawski
@ 2010-09-24  7:01   ` Eric Dumazet
  2010-09-27  1:25     ` David Miller
  2010-09-27  1:22   ` David Miller
  2010-09-27  1:24   ` Changli Gao
  2 siblings, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2010-09-24  7:01 UTC (permalink / raw)
  To: Jarek Poplawski
  Cc: Changli Gao, David S. Miller, Oliver Hartkopp, Michael S. Tsirkin,
	netdev

Le vendredi 24 septembre 2010 à 06:36 +0000, Jarek Poplawski a écrit :
> On 2010-09-23 12:15, Changli Gao wrote:
> > Since skb->destructor() is used to account socket memory, and maybe called
> > before the skb is sent out, a corrupt skb maybe sent out finally.
> > 
> > A new destructor is added into structure skb_shared_info(), and it won't
> > be called until the last reference to the data of an skb is put. af_packet
> > uses this destructor instead.
> 
> IMHO, we shouldn't allow for fixing the bad design of one protocol at
> the expense of others by adding more and more conditionals. The proper
> way of handling paged skbs (splice compatible) exists. And the current
> patch doesn't even fix the problem completely against things like
> pskb_expand_head or pskb_copy.
> 
> af_packet could check some flag which guarantees the queued dev can do
> skb_orphan after the real xmit and copy buffers otherwise.

Agreed.

af_packet (tx with mmap) is broken. I wonder who really uses it ?

To properly cope with paged skbs, it should not try to fit several
packets per page.

The mmap api should change so that one mmaped page belongs to at most
one skb, or else we need invasive changes in net/core

This probably makes this stuff less interesting, unless the need is to
send big packets. In this case, why splice was not used instead of
custom mmap ?



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
  2010-09-24  6:36 ` Jarek Poplawski
  2010-09-24  7:01   ` Eric Dumazet
@ 2010-09-27  1:22   ` David Miller
  2010-09-27  5:30     ` Jarek Poplawski
  2010-09-27  1:24   ` Changli Gao
  2 siblings, 1 reply; 13+ messages in thread
From: David Miller @ 2010-09-27  1:22 UTC (permalink / raw)
  To: jarkao2; +Cc: xiaosuo, eric.dumazet, socketcan, mst, netdev

From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 24 Sep 2010 06:36:23 +0000

> af_packet could check some flag which guarantees the queued dev can do
> skb_orphan after the real xmit and copy buffers otherwise.

Jarek, we pre-orphan SKBs in the core way before device even gets
the packet.

So talking about device-specific situations where this could behave
differently has no sense.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
  2010-09-24  6:36 ` Jarek Poplawski
  2010-09-24  7:01   ` Eric Dumazet
  2010-09-27  1:22   ` David Miller
@ 2010-09-27  1:24   ` Changli Gao
  2010-09-27  5:46     ` Jarek Poplawski
  2 siblings, 1 reply; 13+ messages in thread
From: Changli Gao @ 2010-09-27  1:24 UTC (permalink / raw)
  To: Jarek Poplawski, xiaohui.xin
  Cc: David S. Miller, Eric Dumazet, Oliver Hartkopp,
	Michael S. Tsirkin, netdev

On Fri, Sep 24, 2010 at 2:36 PM, Jarek Poplawski <jarkao2@gmail.com> wrote:
> On 2010-09-23 12:15, Changli Gao wrote:
>> Since skb->destructor() is used to account socket memory, and maybe called
>> before the skb is sent out, a corrupt skb maybe sent out finally.
>>
>> A new destructor is added into structure skb_shared_info(), and it won't
>> be called until the last reference to the data of an skb is put. af_packet
>> uses this destructor instead.
>
> IMHO, we shouldn't allow for fixing the bad design of one protocol at
> the expense of others by adding more and more conditionals. The proper
> way of handling paged skbs (splice compatible) exists. And the current
> patch doesn't even fix the problem completely against things like
> pskb_expand_head or pskb_copy.

pskb_expand_head is handled in my patch, but not pskb_copy().

indeed, there are many issues to fix, and xiaohui's patch may have the
same issues. The proper way may put the destruct handler in pages
instead.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
  2010-09-24  7:01   ` Eric Dumazet
@ 2010-09-27  1:25     ` David Miller
  2010-09-27  5:40       ` Eric Dumazet
  0 siblings, 1 reply; 13+ messages in thread
From: David Miller @ 2010-09-27  1:25 UTC (permalink / raw)
  To: eric.dumazet; +Cc: jarkao2, xiaosuo, socketcan, mst, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 24 Sep 2010 09:01:00 +0200

> af_packet (tx with mmap) is broken. I wonder who really uses it ?

I suspect now that af_packet supports VNET headers on transmit,
there are some things using this tx+mmap thing for sure.

> To properly cope with paged skbs, it should not try to fit several
> packets per page.
> 
> The mmap api should change so that one mmaped page belongs to at most
> one skb, or else we need invasive changes in net/core
> 
> This probably makes this stuff less interesting, unless the need is to
> send big packets. In this case, why splice was not used instead of
> custom mmap ?

I don't really see what the big issue is.

When the data destructor runs it means that packet's part of the pages
are available for reuse for the tx mmap client.  And if I read it
correctly, that's exactly what tpacket_destruct_skb() is in fact doing.

There seems to be no conflict with that rule and reusing a page for
multiple packets.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
  2010-09-27  1:22   ` David Miller
@ 2010-09-27  5:30     ` Jarek Poplawski
  2010-09-27  6:56       ` David Miller
  0 siblings, 1 reply; 13+ messages in thread
From: Jarek Poplawski @ 2010-09-27  5:30 UTC (permalink / raw)
  To: David Miller; +Cc: xiaosuo, eric.dumazet, socketcan, mst, netdev

On Sun, Sep 26, 2010 at 06:22:08PM -0700, David Miller wrote:
> From: Jarek Poplawski <jarkao2@gmail.com>
> Date: Fri, 24 Sep 2010 06:36:23 +0000
> 
> > af_packet could check some flag which guarantees the queued dev can do
> > skb_orphan after the real xmit and copy buffers otherwise.
> 
> Jarek, we pre-orphan SKBs in the core way before device even gets
> the packet.

I'm not sure which place in the core do you mean; skb_orphan_try() in
dev_hard_start_xmit() depends on ->tx_flags.

Jarek P.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
  2010-09-27  1:25     ` David Miller
@ 2010-09-27  5:40       ` Eric Dumazet
  0 siblings, 0 replies; 13+ messages in thread
From: Eric Dumazet @ 2010-09-27  5:40 UTC (permalink / raw)
  To: David Miller; +Cc: jarkao2, xiaosuo, socketcan, mst, netdev

Le dimanche 26 septembre 2010 à 18:25 -0700, David Miller a écrit :
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Fri, 24 Sep 2010 09:01:00 +0200
> 
> > af_packet (tx with mmap) is broken. I wonder who really uses it ?
> 
> I suspect now that af_packet supports VNET headers on transmit,
> there are some things using this tx+mmap thing for sure.
> 
> > To properly cope with paged skbs, it should not try to fit several
> > packets per page.
> > 
> > The mmap api should change so that one mmaped page belongs to at most
> > one skb, or else we need invasive changes in net/core
> > 
> > This probably makes this stuff less interesting, unless the need is to
> > send big packets. In this case, why splice was not used instead of
> > custom mmap ?
> 
> I don't really see what the big issue is.
> 
> When the data destructor runs it means that packet's part of the pages
> are available for reuse for the tx mmap client.  And if I read it
> correctly, that's exactly what tpacket_destruct_skb() is in fact doing.
> 
> There seems to be no conflict with that rule and reusing a page for
> multiple packets.

I was wondering if somewhere we transfert a frag from one skb1 to
another skb2, and eventually free skb1.

I just check tcp collapse and found it was not coping with frags, yet.




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
  2010-09-27  1:24   ` Changli Gao
@ 2010-09-27  5:46     ` Jarek Poplawski
  0 siblings, 0 replies; 13+ messages in thread
From: Jarek Poplawski @ 2010-09-27  5:46 UTC (permalink / raw)
  To: Changli Gao
  Cc: xiaohui.xin, David S. Miller, Eric Dumazet, Oliver Hartkopp,
	Michael S. Tsirkin, netdev

On Mon, Sep 27, 2010 at 09:24:02AM +0800, Changli Gao wrote:
> On Fri, Sep 24, 2010 at 2:36 PM, Jarek Poplawski <jarkao2@gmail.com> wrote:
> > On 2010-09-23 12:15, Changli Gao wrote:
> >> Since skb->destructor() is used to account socket memory, and maybe called
> >> before the skb is sent out, a corrupt skb maybe sent out finally.
> >>
> >> A new destructor is added into structure skb_shared_info(), and it won't
> >> be called until the last reference to the data of an skb is put. af_packet
> >> uses this destructor instead.
> >
> > IMHO, we shouldn't allow for fixing the bad design of one protocol at
> > the expense of others by adding more and more conditionals. The proper
> > way of handling paged skbs (splice compatible) exists. And the current
> > patch doesn't even fix the problem completely against things like
> > pskb_expand_head or pskb_copy.
> 
> pskb_expand_head is handled in my patch, but not pskb_copy().

It's not enough: "skb_shinfo(skb)->data_destructor = NULL" means
skb_release_data() for the original skb->data will not have one, and
you don't know which of the two releases will be the last.

Jarek P.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
  2010-09-27  5:30     ` Jarek Poplawski
@ 2010-09-27  6:56       ` David Miller
  0 siblings, 0 replies; 13+ messages in thread
From: David Miller @ 2010-09-27  6:56 UTC (permalink / raw)
  To: jarkao2; +Cc: xiaosuo, eric.dumazet, socketcan, mst, netdev

From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 27 Sep 2010 05:30:47 +0000

> On Sun, Sep 26, 2010 at 06:22:08PM -0700, David Miller wrote:
>> From: Jarek Poplawski <jarkao2@gmail.com>
>> Date: Fri, 24 Sep 2010 06:36:23 +0000
>> 
>> > af_packet could check some flag which guarantees the queued dev can do
>> > skb_orphan after the real xmit and copy buffers otherwise.
>> 
>> Jarek, we pre-orphan SKBs in the core way before device even gets
>> the packet.
> 
> I'm not sure which place in the core do you mean; skb_orphan_try() in
> dev_hard_start_xmit() depends on ->tx_flags.

Right, I keep forgetting about that special check, thanks for
reminding me :)

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2010-09-27  6:56 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-23 10:15 [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out Changli Gao
2010-09-23 12:29 ` Eric Dumazet
2010-09-23 14:17   ` Changli Gao
2010-09-23 14:41     ` Eric Dumazet
2010-09-24  6:36 ` Jarek Poplawski
2010-09-24  7:01   ` Eric Dumazet
2010-09-27  1:25     ` David Miller
2010-09-27  5:40       ` Eric Dumazet
2010-09-27  1:22   ` David Miller
2010-09-27  5:30     ` Jarek Poplawski
2010-09-27  6:56       ` David Miller
2010-09-27  1:24   ` Changli Gao
2010-09-27  5:46     ` Jarek Poplawski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).