All of lore.kernel.org
 help / color / mirror / Atom feed
From: Changli Gao <xiaosuo@gmail.com>
To: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>,
	Oliver Hartkopp <socketcan@hartkopp.net>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	netdev@vger.kernel.org, Changli Gao <xiaosuo@gmail.com>
Subject: [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out
Date: Thu, 23 Sep 2010 18:15:39 +0800	[thread overview]
Message-ID: <1285236939-3239-1-git-send-email-xiaosuo@gmail.com> (raw)

Since skb->destructor() is used to account socket memory, and maybe called
before the skb is sent out, a corrupt skb maybe sent out finally.

A new destructor is added into structure skb_shared_info(), and it won't
be called until the last reference to the data of an skb is put. af_packet
uses this destructor instead.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
---
v3: rename destructor to data_destructor, destructor_arg to data_destructor_arg,
    fix splice the skbs generated by AF_PACKET socket to the pipe.
v2: avoid kmalloc/kfree
 include/linux/skbuff.h |    7 ++++---
 net/core/skbuff.c      |   29 ++++++++++++++++++++---------
 net/packet/af_packet.c |   25 ++++++++++++-------------
 3 files changed, 36 insertions(+), 25 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9e8085a..0854135 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -191,15 +191,16 @@ struct skb_shared_info {
 	__u8		tx_flags;
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
+	void		(*data_destructor)(struct sk_buff *skb);
 
 	/*
 	 * Warning : all fields before dataref are cleared in __alloc_skb()
 	 */
 	atomic_t	dataref;
 
-	/* Intermediate layers must ensure that destructor_arg
-	 * remains valid until skb destructor */
-	void *		destructor_arg;
+	/* Intermediate layers must ensure that data_destructor_arg
+	 * remains valid until skb data destructor */
+	void		*data_destructor_arg[2];
 	/* must be last field, see pskb_expand_head() */
 	skb_frag_t	frags[MAX_SKB_FRAGS];
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 752c197..95a48fb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -332,10 +332,14 @@ static void skb_release_data(struct sk_buff *skb)
 	if (!skb->cloned ||
 	    !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
 			       &skb_shinfo(skb)->dataref)) {
-		if (skb_shinfo(skb)->nr_frags) {
+		struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+		if (shinfo->data_destructor)
+			shinfo->data_destructor(skb);
+		if (shinfo->nr_frags) {
 			int i;
-			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-				put_page(skb_shinfo(skb)->frags[i].page);
+			for (i = 0; i < shinfo->nr_frags; i++)
+				put_page(shinfo->frags[i].page);
 		}
 
 		if (skb_has_frag_list(skb))
@@ -497,9 +501,12 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size)
 	if (skb_shared(skb) || skb_cloned(skb))
 		return false;
 
+	shinfo = skb_shinfo(skb);
+	if (shinfo->data_destructor)
+		return false;
+
 	skb_release_head_state(skb);
 
-	shinfo = skb_shinfo(skb);
 	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
 
@@ -799,7 +806,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 
 	memcpy((struct skb_shared_info *)(data + size),
 	       skb_shinfo(skb),
-	       offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
+	       offsetof(struct skb_shared_info,
+			frags[skb_shinfo(skb)->nr_frags]));
+	skb_shinfo(skb)->data_destructor = NULL;
 
 	/* Check if we can avoid taking references on fragments if we own
 	 * the last reference on skb->head. (see skb_release_data())
@@ -1408,7 +1417,7 @@ new_page:
 static inline int spd_fill_page(struct splice_pipe_desc *spd,
 				struct pipe_inode_info *pipe, struct page *page,
 				unsigned int *len, unsigned int offset,
-				struct sk_buff *skb, int linear,
+				struct sk_buff *skb, bool linear,
 				struct sock *sk)
 {
 	if (unlikely(spd->nr_pages == pipe->buffers))
@@ -1446,7 +1455,7 @@ static inline void __segment_seek(struct page **page, unsigned int *poff,
 static inline int __splice_segment(struct page *page, unsigned int poff,
 				   unsigned int plen, unsigned int *off,
 				   unsigned int *len, struct sk_buff *skb,
-				   struct splice_pipe_desc *spd, int linear,
+				   struct splice_pipe_desc *spd, bool linear,
 				   struct sock *sk,
 				   struct pipe_inode_info *pipe)
 {
@@ -1498,7 +1507,7 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
 	if (__splice_segment(virt_to_page(skb->data),
 			     (unsigned long) skb->data & (PAGE_SIZE - 1),
 			     skb_headlen(skb),
-			     offset, len, skb, spd, 1, sk, pipe))
+			     offset, len, skb, spd, true, sk, pipe))
 		return 1;
 
 	/*
@@ -1508,7 +1517,9 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
 		if (__splice_segment(f->page, f->page_offset, f->size,
-				     offset, len, skb, spd, 0, sk, pipe))
+				     offset, len, skb, spd,
+				     skb_shinfo(skb)->data_destructor != NULL,
+				     sk, pipe))
 			return 1;
 	}
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3616f27..ecf57c7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -825,19 +825,19 @@ ring_is_full:
 
 static void tpacket_destruct_skb(struct sk_buff *skb)
 {
-	struct packet_sock *po = pkt_sk(skb->sk);
-	void *ph;
-
-	BUG_ON(skb == NULL);
+	struct packet_sock *po;
 
+	po = pkt_sk(skb_shinfo(skb)->data_destructor_arg[0]);
 	if (likely(po->tx_ring.pg_vec)) {
-		ph = skb_shinfo(skb)->destructor_arg;
+		void *ph = skb_shinfo(skb)->data_destructor_arg[1];
+
 		BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
 		BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
 		atomic_dec(&po->tx_ring.pending);
 		__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
 	}
 
+	skb->sk = &po->sk;
 	sock_wfree(skb);
 }
 
@@ -862,7 +862,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb->dev = dev;
 	skb->priority = po->sk.sk_priority;
 	skb->mark = po->sk.sk_mark;
-	skb_shinfo(skb)->destructor_arg = ph.raw;
 
 	switch (po->tp_version) {
 	case TPACKET_V2:
@@ -884,9 +883,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	to_write = tp_len;
 
 	if (sock->type == SOCK_DGRAM) {
-		err = dev_hard_header(skb, dev, ntohs(proto), addr,
-				NULL, tp_len);
-		if (unlikely(err < 0))
+		if (unlikely(dev_hard_header(skb, dev, ntohs(proto), addr,
+					     NULL, tp_len) < 0))
 			return -EINVAL;
 	} else if (dev->hard_header_len) {
 		/* net device doesn't like empty head */
@@ -897,8 +895,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		}
 
 		skb_push(skb, dev->hard_header_len);
-		err = skb_store_bits(skb, 0, data,
-				dev->hard_header_len);
+		err = skb_store_bits(skb, 0, data, dev->hard_header_len);
 		if (unlikely(err))
 			return err;
 
@@ -906,7 +903,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		to_write -= dev->hard_header_len;
 	}
 
-	err = -EFAULT;
 	page = virt_to_page(data);
 	offset = offset_in_page(data);
 	len_max = PAGE_SIZE - offset;
@@ -1028,7 +1024,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 			}
 		}
 
-		skb->destructor = tpacket_destruct_skb;
+		skb_shinfo(skb)->data_destructor_arg[0] = &po->sk;
+		skb_shinfo(skb)->data_destructor_arg[1] = ph;
+		skb->destructor = NULL;
+		skb_shinfo(skb)->data_destructor = tpacket_destruct_skb;
 		__packet_set_status(po, ph, TP_STATUS_SENDING);
 		atomic_inc(&po->tx_ring.pending);
 

             reply	other threads:[~2010-09-23 10:16 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-23 10:15 Changli Gao [this message]
2010-09-23 12:29 ` [PATCH v3] net: af_packet: don't call tpacket_destruct_skb() until the skb is sent out Eric Dumazet
2010-09-23 14:17   ` Changli Gao
2010-09-23 14:41     ` Eric Dumazet
2010-09-24  6:36 ` Jarek Poplawski
2010-09-24  7:01   ` Eric Dumazet
2010-09-27  1:25     ` David Miller
2010-09-27  5:40       ` Eric Dumazet
2010-09-27  1:22   ` David Miller
2010-09-27  5:30     ` Jarek Poplawski
2010-09-27  6:56       ` David Miller
2010-09-27  1:24   ` Changli Gao
2010-09-27  5:46     ` Jarek Poplawski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1285236939-3239-1-git-send-email-xiaosuo@gmail.com \
    --to=xiaosuo@gmail.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=socketcan@hartkopp.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.