stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	Salam Noureddine <noureddine@aristanetworks.com>,
	Daniel Borkmann <dborkman@redhat.com>,
	Ben Greear <greearb@candelatech.com>,
	Eric Dumazet <eric.dumazet@gmail.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 3.4 19/32] packet: fix use after free race in send path when dev is released
Date: Fri,  6 Dec 2013 13:52:29 -0800	[thread overview]
Message-ID: <20131206214958.593556171@linuxfoundation.org> (raw)
In-Reply-To: <20131206214956.830407026@linuxfoundation.org>

3.4-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Daniel Borkmann <dborkman@redhat.com>

[ Upstream commit e40526cb20b5ee53419452e1f03d97092f144418 ]

Salam reported a use after free bug in PF_PACKET that occurs when
we're sending out frames on a socket bound device and suddenly the
net device is being unregistered. It appears that commit 827d9780
introduced a possible race condition between {t,}packet_snd() and
packet_notifier(). In the case of a bound socket, packet_notifier()
can drop the last reference to the net_device and {t,}packet_snd()
might end up suddenly sending a packet over a freed net_device.

To avoid reverting 827d9780 and thus introducing a performance
regression compared to the current state of things, we decided to
hold a cached RCU protected pointer to the net device and maintain
it on write side via bind spin_lock protected register_prot_hook()
and __unregister_prot_hook() calls.

In {t,}packet_snd() path, we access this pointer under rcu_read_lock
through packet_cached_dev_get() that holds reference to the device
to prevent it from being freed through packet_notifier() while
we're in send path. This is okay to do as dev_put()/dev_hold() are
per-cpu counters, so this should not be a performance issue. Also,
the code simplifies a bit as we don't need need_rls_dev anymore.

Fixes: 827d978037d7 ("af-packet: Use existing netdev reference for bound sockets.")
Reported-by: Salam Noureddine <noureddine@aristanetworks.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Salam Noureddine <noureddine@aristanetworks.com>
Cc: Ben Greear <greearb@candelatech.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c |   60 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 23 deletions(-)

--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -294,6 +294,7 @@ struct packet_sock {
 	unsigned int		tp_reserve;
 	unsigned int		tp_loss:1;
 	unsigned int		tp_tstamp;
+	struct net_device __rcu	*cached_dev;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
 };
 
@@ -349,11 +350,15 @@ static void __fanout_link(struct sock *s
 static void register_prot_hook(struct sock *sk)
 {
 	struct packet_sock *po = pkt_sk(sk);
+
 	if (!po->running) {
-		if (po->fanout)
+		if (po->fanout) {
 			__fanout_link(sk, po);
-		else
+		} else {
 			dev_add_pack(&po->prot_hook);
+			rcu_assign_pointer(po->cached_dev, po->prot_hook.dev);
+		}
+
 		sock_hold(sk);
 		po->running = 1;
 	}
@@ -371,10 +376,13 @@ static void __unregister_prot_hook(struc
 	struct packet_sock *po = pkt_sk(sk);
 
 	po->running = 0;
-	if (po->fanout)
+	if (po->fanout) {
 		__fanout_unlink(sk, po);
-	else
+	} else {
 		__dev_remove_pack(&po->prot_hook);
+		RCU_INIT_POINTER(po->cached_dev, NULL);
+	}
+
 	__sock_put(sk);
 
 	if (sync) {
@@ -2044,12 +2052,24 @@ static int tpacket_fill_skb(struct packe
 	return tp_len;
 }
 
+static struct net_device *packet_cached_dev_get(struct packet_sock *po)
+{
+	struct net_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(po->cached_dev);
+	if (dev)
+		dev_hold(dev);
+	rcu_read_unlock();
+
+	return dev;
+}
+
 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 {
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
-	bool need_rls_dev = false;
 	int err, reserve = 0;
 	void *ph;
 	struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
@@ -2063,7 +2083,7 @@ static int tpacket_snd(struct packet_soc
 
 	err = -EBUSY;
 	if (saddr == NULL) {
-		dev = po->prot_hook.dev;
+		dev	= packet_cached_dev_get(po);
 		proto	= po->num;
 		addr	= NULL;
 	} else {
@@ -2077,19 +2097,17 @@ static int tpacket_snd(struct packet_soc
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
 		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
-		need_rls_dev = true;
 	}
 
 	err = -ENXIO;
 	if (unlikely(dev == NULL))
 		goto out;
-
-	reserve = dev->hard_header_len;
-
 	err = -ENETDOWN;
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_put;
 
+	reserve = dev->hard_header_len;
+
 	size_max = po->tx_ring.frame_size
 		- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
 
@@ -2166,8 +2184,7 @@ out_status:
 	__packet_set_status(po, ph, status);
 	kfree_skb(skb);
 out_put:
-	if (need_rls_dev)
-		dev_put(dev);
+	dev_put(dev);
 out:
 	mutex_unlock(&po->pg_vec_lock);
 	return err;
@@ -2205,7 +2222,6 @@ static int packet_snd(struct socket *soc
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
-	bool need_rls_dev = false;
 	unsigned char *addr;
 	int err, reserve = 0;
 	struct virtio_net_hdr vnet_hdr = { 0 };
@@ -2221,7 +2237,7 @@ static int packet_snd(struct socket *soc
 	 */
 
 	if (saddr == NULL) {
-		dev = po->prot_hook.dev;
+		dev	= packet_cached_dev_get(po);
 		proto	= po->num;
 		addr	= NULL;
 	} else {
@@ -2233,19 +2249,17 @@ static int packet_snd(struct socket *soc
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
 		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
-		need_rls_dev = true;
 	}
 
 	err = -ENXIO;
-	if (dev == NULL)
+	if (unlikely(dev == NULL))
 		goto out_unlock;
-	if (sock->type == SOCK_RAW)
-		reserve = dev->hard_header_len;
-
 	err = -ENETDOWN;
-	if (!(dev->flags & IFF_UP))
+	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_unlock;
 
+	if (sock->type == SOCK_RAW)
+		reserve = dev->hard_header_len;
 	if (po->has_vnet_hdr) {
 		vnet_hdr_len = sizeof(vnet_hdr);
 
@@ -2378,15 +2392,14 @@ static int packet_snd(struct socket *soc
 	if (err > 0 && (err = net_xmit_errno(err)) != 0)
 		goto out_unlock;
 
-	if (need_rls_dev)
-		dev_put(dev);
+	dev_put(dev);
 
 	return len;
 
 out_free:
 	kfree_skb(skb);
 out_unlock:
-	if (dev && need_rls_dev)
+	if (dev)
 		dev_put(dev);
 out:
 	return err;
@@ -2603,6 +2616,7 @@ static int packet_create(struct net *net
 	po = pkt_sk(sk);
 	sk->sk_family = PF_PACKET;
 	po->num = proto;
+	RCU_INIT_POINTER(po->cached_dev, NULL);
 
 	sk->sk_destruct = packet_sock_destruct;
 	sk_refcnt_debug_inc(sk);



  parent reply	other threads:[~2013-12-06 21:52 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-12-06 21:52 [PATCH 3.4 00/32] 3.4.73-stable review Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 01/32] net: Fix "ip rule delete table 256" Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 02/32] ipv6: use rt6_get_dflt_router to get default router in rt6_route_rcv Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 03/32] random32: fix off-by-one in seeding requirement Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 04/32] bonding: dont permit to use ARP monitoring in 802.3ad mode Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 05/32] 6lowpan: Uncompression of traffic class field was incorrect Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 06/32] bonding: fix two race conditions in bond_store_updelay/downdelay Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 07/32] isdnloop: use strlcpy() instead of strcpy() Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 08/32] connector: improved unaligned access error fix Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 09/32] ipv4: fix possible seqlock deadlock Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 10/32] inet: prevent leakage of uninitialized memory to user in recv syscalls Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 11/32] net: rework recvmsg handler msg_name and msg_namelen logic Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 12/32] net: add BUG_ON if kernel advertises msg_namelen > sizeof(struct sockaddr_storage) Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 13/32] inet: fix addr_len/msg->msg_namelen assignment in recv_error and rxpmtu functions Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 14/32] net: clamp ->msg_namelen instead of returning an error Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 15/32] ipv6: fix leaking uninitialized port number of offender sockaddr Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 16/32] atm: idt77252: fix dev refcnt leak Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 17/32] net: core: Always propagate flag changes to interfaces Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 18/32] bridge: flush brs address entry in fdb when remove the bridge dev Greg Kroah-Hartman
2013-12-06 21:52 ` Greg Kroah-Hartman [this message]
2013-12-06 21:52 ` [PATCH 3.4 20/32] af_packet: block BH in prb_shutdown_retire_blk_timer() Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 21/32] net: update consumers of MSG_MORE to recognize MSG_SENDPAGE_NOTLAST Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 22/32] inet: fix possible seqlock deadlocks Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 23/32] ipv6: fix possible seqlock deadlock in ip6_finish_output2 Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 24/32] {pktgen, xfrm} Update IPv4 header total len and checksum after tranformation Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 25/32] HID: picolcd_core: validate output report details Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 26/32] mmc: block: fix a bug of error handling in MMC driver Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 27/32] nfsd: use "init_net" for portmapper Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 28/32] video: kyro: fix incorrect sizes when copying to userspace Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 29/32] iommu/vt-d: Fixed interaction of VFIO_IOMMU_MAP_DMA with IOMMU address limits Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 30/32] elevator: acquire q->sysfs_lock in elevator_change() Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 31/32] dm: fix truncated status strings Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 32/32] blk-core: Fix memory corruption if blkcg_init_queue fails Greg Kroah-Hartman
2013-12-07  6:45 ` [PATCH 3.4 00/32] 3.4.73-stable review Guenter Roeck
2013-12-07 17:02   ` Greg Kroah-Hartman
2013-12-07 22:16 ` Shuah Khan
2013-12-08 14:38 ` Satoru Takeuchi
2013-12-08 15:23   ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20131206214958.593556171@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=dborkman@redhat.com \
    --cc=eric.dumazet@gmail.com \
    --cc=greearb@candelatech.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=noureddine@aristanetworks.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).