All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	Salam Noureddine <noureddine@aristanetworks.com>,
	Daniel Borkmann <dborkman@redhat.com>,
	Ben Greear <greearb@candelatech.com>,
	Eric Dumazet <eric.dumazet@gmail.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 3.4 19/32] packet: fix use after free race in send path when dev is released
Date: Fri,  6 Dec 2013 13:52:29 -0800	[thread overview]
Message-ID: <20131206214958.593556171@linuxfoundation.org> (raw)
In-Reply-To: <20131206214956.830407026@linuxfoundation.org>

3.4-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Daniel Borkmann <dborkman@redhat.com>

[ Upstream commit e40526cb20b5ee53419452e1f03d97092f144418 ]

Salam reported a use after free bug in PF_PACKET that occurs when
we're sending out frames on a socket bound device and suddenly the
net device is being unregistered. It appears that commit 827d9780
introduced a possible race condition between {t,}packet_snd() and
packet_notifier(). In the case of a bound socket, packet_notifier()
can drop the last reference to the net_device and {t,}packet_snd()
might end up suddenly sending a packet over a freed net_device.

To avoid reverting 827d9780 and thus introducing a performance
regression compared to the current state of things, we decided to
hold a cached RCU protected pointer to the net device and maintain
it on write side via bind spin_lock protected register_prot_hook()
and __unregister_prot_hook() calls.

In {t,}packet_snd() path, we access this pointer under rcu_read_lock
through packet_cached_dev_get() that holds reference to the device
to prevent it from being freed through packet_notifier() while
we're in send path. This is okay to do as dev_put()/dev_hold() are
per-cpu counters, so this should not be a performance issue. Also,
the code simplifies a bit as we don't need need_rls_dev anymore.

Fixes: 827d978037d7 ("af-packet: Use existing netdev reference for bound sockets.")
Reported-by: Salam Noureddine <noureddine@aristanetworks.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Salam Noureddine <noureddine@aristanetworks.com>
Cc: Ben Greear <greearb@candelatech.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c |   60 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 23 deletions(-)

--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -294,6 +294,7 @@ struct packet_sock {
 	unsigned int		tp_reserve;
 	unsigned int		tp_loss:1;
 	unsigned int		tp_tstamp;
+	struct net_device __rcu	*cached_dev;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
 };
 
@@ -349,11 +350,15 @@ static void __fanout_link(struct sock *s
 static void register_prot_hook(struct sock *sk)
 {
 	struct packet_sock *po = pkt_sk(sk);
+
 	if (!po->running) {
-		if (po->fanout)
+		if (po->fanout) {
 			__fanout_link(sk, po);
-		else
+		} else {
 			dev_add_pack(&po->prot_hook);
+			rcu_assign_pointer(po->cached_dev, po->prot_hook.dev);
+		}
+
 		sock_hold(sk);
 		po->running = 1;
 	}
@@ -371,10 +376,13 @@ static void __unregister_prot_hook(struc
 	struct packet_sock *po = pkt_sk(sk);
 
 	po->running = 0;
-	if (po->fanout)
+	if (po->fanout) {
 		__fanout_unlink(sk, po);
-	else
+	} else {
 		__dev_remove_pack(&po->prot_hook);
+		RCU_INIT_POINTER(po->cached_dev, NULL);
+	}
+
 	__sock_put(sk);
 
 	if (sync) {
@@ -2044,12 +2052,24 @@ static int tpacket_fill_skb(struct packe
 	return tp_len;
 }
 
+static struct net_device *packet_cached_dev_get(struct packet_sock *po)
+{
+	struct net_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(po->cached_dev);
+	if (dev)
+		dev_hold(dev);
+	rcu_read_unlock();
+
+	return dev;
+}
+
 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 {
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
-	bool need_rls_dev = false;
 	int err, reserve = 0;
 	void *ph;
 	struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
@@ -2063,7 +2083,7 @@ static int tpacket_snd(struct packet_soc
 
 	err = -EBUSY;
 	if (saddr == NULL) {
-		dev = po->prot_hook.dev;
+		dev	= packet_cached_dev_get(po);
 		proto	= po->num;
 		addr	= NULL;
 	} else {
@@ -2077,19 +2097,17 @@ static int tpacket_snd(struct packet_soc
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
 		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
-		need_rls_dev = true;
 	}
 
 	err = -ENXIO;
 	if (unlikely(dev == NULL))
 		goto out;
-
-	reserve = dev->hard_header_len;
-
 	err = -ENETDOWN;
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_put;
 
+	reserve = dev->hard_header_len;
+
 	size_max = po->tx_ring.frame_size
 		- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
 
@@ -2166,8 +2184,7 @@ out_status:
 	__packet_set_status(po, ph, status);
 	kfree_skb(skb);
 out_put:
-	if (need_rls_dev)
-		dev_put(dev);
+	dev_put(dev);
 out:
 	mutex_unlock(&po->pg_vec_lock);
 	return err;
@@ -2205,7 +2222,6 @@ static int packet_snd(struct socket *soc
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
-	bool need_rls_dev = false;
 	unsigned char *addr;
 	int err, reserve = 0;
 	struct virtio_net_hdr vnet_hdr = { 0 };
@@ -2221,7 +2237,7 @@ static int packet_snd(struct socket *soc
 	 */
 
 	if (saddr == NULL) {
-		dev = po->prot_hook.dev;
+		dev	= packet_cached_dev_get(po);
 		proto	= po->num;
 		addr	= NULL;
 	} else {
@@ -2233,19 +2249,17 @@ static int packet_snd(struct socket *soc
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
 		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
-		need_rls_dev = true;
 	}
 
 	err = -ENXIO;
-	if (dev == NULL)
+	if (unlikely(dev == NULL))
 		goto out_unlock;
-	if (sock->type == SOCK_RAW)
-		reserve = dev->hard_header_len;
-
 	err = -ENETDOWN;
-	if (!(dev->flags & IFF_UP))
+	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_unlock;
 
+	if (sock->type == SOCK_RAW)
+		reserve = dev->hard_header_len;
 	if (po->has_vnet_hdr) {
 		vnet_hdr_len = sizeof(vnet_hdr);
 
@@ -2378,15 +2392,14 @@ static int packet_snd(struct socket *soc
 	if (err > 0 && (err = net_xmit_errno(err)) != 0)
 		goto out_unlock;
 
-	if (need_rls_dev)
-		dev_put(dev);
+	dev_put(dev);
 
 	return len;
 
 out_free:
 	kfree_skb(skb);
 out_unlock:
-	if (dev && need_rls_dev)
+	if (dev)
 		dev_put(dev);
 out:
 	return err;
@@ -2603,6 +2616,7 @@ static int packet_create(struct net *net
 	po = pkt_sk(sk);
 	sk->sk_family = PF_PACKET;
 	po->num = proto;
+	RCU_INIT_POINTER(po->cached_dev, NULL);
 
 	sk->sk_destruct = packet_sock_destruct;
 	sk_refcnt_debug_inc(sk);



  parent reply	other threads:[~2013-12-06 21:54 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-12-06 21:52 [PATCH 3.4 00/32] 3.4.73-stable review Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 01/32] net: Fix "ip rule delete table 256" Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 02/32] ipv6: use rt6_get_dflt_router to get default router in rt6_route_rcv Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 03/32] random32: fix off-by-one in seeding requirement Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 04/32] bonding: dont permit to use ARP monitoring in 802.3ad mode Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 05/32] 6lowpan: Uncompression of traffic class field was incorrect Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 06/32] bonding: fix two race conditions in bond_store_updelay/downdelay Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 07/32] isdnloop: use strlcpy() instead of strcpy() Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 08/32] connector: improved unaligned access error fix Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 09/32] ipv4: fix possible seqlock deadlock Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 10/32] inet: prevent leakage of uninitialized memory to user in recv syscalls Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 11/32] net: rework recvmsg handler msg_name and msg_namelen logic Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 12/32] net: add BUG_ON if kernel advertises msg_namelen > sizeof(struct sockaddr_storage) Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 13/32] inet: fix addr_len/msg->msg_namelen assignment in recv_error and rxpmtu functions Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 14/32] net: clamp ->msg_namelen instead of returning an error Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 15/32] ipv6: fix leaking uninitialized port number of offender sockaddr Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 16/32] atm: idt77252: fix dev refcnt leak Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 17/32] net: core: Always propagate flag changes to interfaces Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 18/32] bridge: flush brs address entry in fdb when remove the bridge dev Greg Kroah-Hartman
2013-12-06 21:52 ` Greg Kroah-Hartman [this message]
2013-12-06 21:52 ` [PATCH 3.4 20/32] af_packet: block BH in prb_shutdown_retire_blk_timer() Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 21/32] net: update consumers of MSG_MORE to recognize MSG_SENDPAGE_NOTLAST Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 22/32] inet: fix possible seqlock deadlocks Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 23/32] ipv6: fix possible seqlock deadlock in ip6_finish_output2 Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 24/32] {pktgen, xfrm} Update IPv4 header total len and checksum after tranformation Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 25/32] HID: picolcd_core: validate output report details Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 26/32] mmc: block: fix a bug of error handling in MMC driver Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 27/32] nfsd: use "init_net" for portmapper Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 28/32] video: kyro: fix incorrect sizes when copying to userspace Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 29/32] iommu/vt-d: Fixed interaction of VFIO_IOMMU_MAP_DMA with IOMMU address limits Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 30/32] elevator: acquire q->sysfs_lock in elevator_change() Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 31/32] dm: fix truncated status strings Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 32/32] blk-core: Fix memory corruption if blkcg_init_queue fails Greg Kroah-Hartman
2013-12-07  6:45 ` [PATCH 3.4 00/32] 3.4.73-stable review Guenter Roeck
2013-12-07 17:02   ` Greg Kroah-Hartman
2013-12-07 22:16 ` Shuah Khan
2013-12-08 14:38 ` Satoru Takeuchi
2013-12-08 15:23   ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20131206214958.593556171@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=dborkman@redhat.com \
    --cc=eric.dumazet@gmail.com \
    --cc=greearb@candelatech.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=noureddine@aristanetworks.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.