netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
@ 2010-09-23 10:15 Changli Gao
  2010-09-23 12:29 ` Eric Dumazet
  2010-09-24  6:36 ` Jarek Poplawski
  0 siblings, 2 replies; 13+ messages in thread
From: Changli Gao @ 2010-09-23 10:15 UTC (permalink / raw)
  To: David S. Miller
  Cc: Eric Dumazet, Oliver Hartkopp, Michael S. Tsirkin, netdev,
	Changli Gao

Since skb->destructor() is used to account socket memory, and maybe called
before the skb is sent out, a corrupt skb maybe sent out finally.

A new destructor is added into structure skb_shared_info(), and it won't
be called until the last reference to the data of an skb is put. af_packet
uses this destructor instead.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
---
v3: rename destructor to data_destructor, destructor_arg to data_destructor_arg,
    fix splice the skbs generated by AF_PACKET socket to the pipe.
v2: avoid kmalloc/kfree
 include/linux/skbuff.h |    7 ++++---
 net/core/skbuff.c      |   29 ++++++++++++++++++++---------
 net/packet/af_packet.c |   25 ++++++++++++-------------
 3 files changed, 36 insertions(+), 25 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9e8085a..0854135 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -191,15 +191,16 @@ struct skb_shared_info {
 	__u8		tx_flags;
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
+	void		(*data_destructor)(struct sk_buff *skb);
 
 	/*
 	 * Warning : all fields before dataref are cleared in __alloc_skb()
 	 */
 	atomic_t	dataref;
 
-	/* Intermediate layers must ensure that destructor_arg
-	 * remains valid until skb destructor */
-	void *		destructor_arg;
+	/* Intermediate layers must ensure that data_destructor_arg
+	 * remains valid until skb data destructor */
+	void		*data_destructor_arg[2];
 	/* must be last field, see pskb_expand_head() */
 	skb_frag_t	frags[MAX_SKB_FRAGS];
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 752c197..95a48fb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -332,10 +332,14 @@ static void skb_release_data(struct sk_buff *skb)
 	if (!skb->cloned ||
 	    !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
 			       &skb_shinfo(skb)->dataref)) {
-		if (skb_shinfo(skb)->nr_frags) {
+		struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+		if (shinfo->data_destructor)
+			shinfo->data_destructor(skb);
+		if (shinfo->nr_frags) {
 			int i;
-			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-				put_page(skb_shinfo(skb)->frags[i].page);
+			for (i = 0; i < shinfo->nr_frags; i++)
+				put_page(shinfo->frags[i].page);
 		}
 
 		if (skb_has_frag_list(skb))
@@ -497,9 +501,12 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size)
 	if (skb_shared(skb) || skb_cloned(skb))
 		return false;
 
+	shinfo = skb_shinfo(skb);
+	if (shinfo->data_destructor)
+		return false;
+
 	skb_release_head_state(skb);
 
-	shinfo = skb_shinfo(skb);
 	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
 
@@ -799,7 +806,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 
 	memcpy((struct skb_shared_info *)(data + size),
 	       skb_shinfo(skb),
-	       offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
+	       offsetof(struct skb_shared_info,
+			frags[skb_shinfo(skb)->nr_frags]));
+	skb_shinfo(skb)->data_destructor = NULL;
 
 	/* Check if we can avoid taking references on fragments if we own
 	 * the last reference on skb->head. (see skb_release_data())
@@ -1408,7 +1417,7 @@ new_page:
 static inline int spd_fill_page(struct splice_pipe_desc *spd,
 				struct pipe_inode_info *pipe, struct page *page,
 				unsigned int *len, unsigned int offset,
-				struct sk_buff *skb, int linear,
+				struct sk_buff *skb, bool linear,
 				struct sock *sk)
 {
 	if (unlikely(spd->nr_pages == pipe->buffers))
@@ -1446,7 +1455,7 @@ static inline void __segment_seek(struct page **page, unsigned int *poff,
 static inline int __splice_segment(struct page *page, unsigned int poff,
 				   unsigned int plen, unsigned int *off,
 				   unsigned int *len, struct sk_buff *skb,
-				   struct splice_pipe_desc *spd, int linear,
+				   struct splice_pipe_desc *spd, bool linear,
 				   struct sock *sk,
 				   struct pipe_inode_info *pipe)
 {
@@ -1498,7 +1507,7 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
 	if (__splice_segment(virt_to_page(skb->data),
 			     (unsigned long) skb->data & (PAGE_SIZE - 1),
 			     skb_headlen(skb),
-			     offset, len, skb, spd, 1, sk, pipe))
+			     offset, len, skb, spd, true, sk, pipe))
 		return 1;
 
 	/*
@@ -1508,7 +1517,9 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
 		if (__splice_segment(f->page, f->page_offset, f->size,
-				     offset, len, skb, spd, 0, sk, pipe))
+				     offset, len, skb, spd,
+				     skb_shinfo(skb)->data_destructor != NULL,
+				     sk, pipe))
 			return 1;
 	}
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3616f27..ecf57c7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -825,19 +825,19 @@ ring_is_full:
 
 static void tpacket_destruct_skb(struct sk_buff *skb)
 {
-	struct packet_sock *po = pkt_sk(skb->sk);
-	void *ph;
-
-	BUG_ON(skb == NULL);
+	struct packet_sock *po;
 
+	po = pkt_sk(skb_shinfo(skb)->data_destructor_arg[0]);
 	if (likely(po->tx_ring.pg_vec)) {
-		ph = skb_shinfo(skb)->destructor_arg;
+		void *ph = skb_shinfo(skb)->data_destructor_arg[1];
+
 		BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
 		BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
 		atomic_dec(&po->tx_ring.pending);
 		__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
 	}
 
+	skb->sk = &po->sk;
 	sock_wfree(skb);
 }
 
@@ -862,7 +862,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb->dev = dev;
 	skb->priority = po->sk.sk_priority;
 	skb->mark = po->sk.sk_mark;
-	skb_shinfo(skb)->destructor_arg = ph.raw;
 
 	switch (po->tp_version) {
 	case TPACKET_V2:
@@ -884,9 +883,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	to_write = tp_len;
 
 	if (sock->type == SOCK_DGRAM) {
-		err = dev_hard_header(skb, dev, ntohs(proto), addr,
-				NULL, tp_len);
-		if (unlikely(err < 0))
+		if (unlikely(dev_hard_header(skb, dev, ntohs(proto), addr,
+					     NULL, tp_len) < 0))
 			return -EINVAL;
 	} else if (dev->hard_header_len) {
 		/* net device doesn't like empty head */
@@ -897,8 +895,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		}
 
 		skb_push(skb, dev->hard_header_len);
-		err = skb_store_bits(skb, 0, data,
-				dev->hard_header_len);
+		err = skb_store_bits(skb, 0, data, dev->hard_header_len);
 		if (unlikely(err))
 			return err;
 
@@ -906,7 +903,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		to_write -= dev->hard_header_len;
 	}
 
-	err = -EFAULT;
 	page = virt_to_page(data);
 	offset = offset_in_page(data);
 	len_max = PAGE_SIZE - offset;
@@ -1028,7 +1024,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 			}
 		}
 
-		skb->destructor = tpacket_destruct_skb;
+		skb_shinfo(skb)->data_destructor_arg[0] = &po->sk;
+		skb_shinfo(skb)->data_destructor_arg[1] = ph;
+		skb->destructor = NULL;
+		skb_shinfo(skb)->data_destructor = tpacket_destruct_skb;
 		__packet_set_status(po, ph, TP_STATUS_SENDING);
 		atomic_inc(&po->tx_ring.pending);
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2010-09-27  6:56 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-23 10:15 [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out Changli Gao
2010-09-23 12:29 ` Eric Dumazet
2010-09-23 14:17   ` Changli Gao
2010-09-23 14:41     ` Eric Dumazet
2010-09-24  6:36 ` Jarek Poplawski
2010-09-24  7:01   ` Eric Dumazet
2010-09-27  1:25     ` David Miller
2010-09-27  5:40       ` Eric Dumazet
2010-09-27  1:22   ` David Miller
2010-09-27  5:30     ` Jarek Poplawski
2010-09-27  6:56       ` David Miller
2010-09-27  1:24   ` Changli Gao
2010-09-27  5:46     ` Jarek Poplawski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).