From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org,
Salam Noureddine <noureddine@aristanetworks.com>,
Daniel Borkmann <dborkman@redhat.com>,
Ben Greear <greearb@candelatech.com>,
Eric Dumazet <eric.dumazet@gmail.com>,
"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 3.4 19/32] packet: fix use after free race in send path when dev is released
Date: Fri, 6 Dec 2013 13:52:29 -0800 [thread overview]
Message-ID: <20131206214958.593556171@linuxfoundation.org> (raw)
In-Reply-To: <20131206214956.830407026@linuxfoundation.org>
3.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Daniel Borkmann <dborkman@redhat.com>
[ Upstream commit e40526cb20b5ee53419452e1f03d97092f144418 ]
Salam reported a use after free bug in PF_PACKET that occurs when
we're sending out frames on a socket bound device and suddenly the
net device is being unregistered. It appears that commit 827d9780
introduced a possible race condition between {t,}packet_snd() and
packet_notifier(). In the case of a bound socket, packet_notifier()
can drop the last reference to the net_device and {t,}packet_snd()
might end up suddenly sending a packet over a freed net_device.
To avoid reverting 827d9780 and thus introducing a performance
regression compared to the current state of things, we decided to
hold a cached RCU protected pointer to the net device and maintain
it on write side via bind spin_lock protected register_prot_hook()
and __unregister_prot_hook() calls.
In {t,}packet_snd() path, we access this pointer under rcu_read_lock
through packet_cached_dev_get() that holds reference to the device
to prevent it from being freed through packet_notifier() while
we're in send path. This is okay to do as dev_put()/dev_hold() are
per-cpu counters, so this should not be a performance issue. Also,
the code simplifies a bit as we don't need need_rls_dev anymore.
Fixes: 827d978037d7 ("af-packet: Use existing netdev reference for bound sockets.")
Reported-by: Salam Noureddine <noureddine@aristanetworks.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Salam Noureddine <noureddine@aristanetworks.com>
Cc: Ben Greear <greearb@candelatech.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/packet/af_packet.c | 60 ++++++++++++++++++++++++++++++-------------------
1 file changed, 37 insertions(+), 23 deletions(-)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -294,6 +294,7 @@ struct packet_sock {
unsigned int tp_reserve;
unsigned int tp_loss:1;
unsigned int tp_tstamp;
+ struct net_device __rcu *cached_dev;
struct packet_type prot_hook ____cacheline_aligned_in_smp;
};
@@ -349,11 +350,15 @@ static void __fanout_link(struct sock *s
static void register_prot_hook(struct sock *sk)
{
struct packet_sock *po = pkt_sk(sk);
+
if (!po->running) {
- if (po->fanout)
+ if (po->fanout) {
__fanout_link(sk, po);
- else
+ } else {
dev_add_pack(&po->prot_hook);
+ rcu_assign_pointer(po->cached_dev, po->prot_hook.dev);
+ }
+
sock_hold(sk);
po->running = 1;
}
@@ -371,10 +376,13 @@ static void __unregister_prot_hook(struc
struct packet_sock *po = pkt_sk(sk);
po->running = 0;
- if (po->fanout)
+ if (po->fanout) {
__fanout_unlink(sk, po);
- else
+ } else {
__dev_remove_pack(&po->prot_hook);
+ RCU_INIT_POINTER(po->cached_dev, NULL);
+ }
+
__sock_put(sk);
if (sync) {
@@ -2044,12 +2052,24 @@ static int tpacket_fill_skb(struct packe
return tp_len;
}
+static struct net_device *packet_cached_dev_get(struct packet_sock *po)
+{
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = rcu_dereference(po->cached_dev);
+ if (dev)
+ dev_hold(dev);
+ rcu_read_unlock();
+
+ return dev;
+}
+
static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
{
struct sk_buff *skb;
struct net_device *dev;
__be16 proto;
- bool need_rls_dev = false;
int err, reserve = 0;
void *ph;
struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
@@ -2063,7 +2083,7 @@ static int tpacket_snd(struct packet_soc
err = -EBUSY;
if (saddr == NULL) {
- dev = po->prot_hook.dev;
+ dev = packet_cached_dev_get(po);
proto = po->num;
addr = NULL;
} else {
@@ -2077,19 +2097,17 @@ static int tpacket_snd(struct packet_soc
proto = saddr->sll_protocol;
addr = saddr->sll_addr;
dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
- need_rls_dev = true;
}
err = -ENXIO;
if (unlikely(dev == NULL))
goto out;
-
- reserve = dev->hard_header_len;
-
err = -ENETDOWN;
if (unlikely(!(dev->flags & IFF_UP)))
goto out_put;
+ reserve = dev->hard_header_len;
+
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
@@ -2166,8 +2184,7 @@ out_status:
__packet_set_status(po, ph, status);
kfree_skb(skb);
out_put:
- if (need_rls_dev)
- dev_put(dev);
+ dev_put(dev);
out:
mutex_unlock(&po->pg_vec_lock);
return err;
@@ -2205,7 +2222,6 @@ static int packet_snd(struct socket *soc
struct sk_buff *skb;
struct net_device *dev;
__be16 proto;
- bool need_rls_dev = false;
unsigned char *addr;
int err, reserve = 0;
struct virtio_net_hdr vnet_hdr = { 0 };
@@ -2221,7 +2237,7 @@ static int packet_snd(struct socket *soc
*/
if (saddr == NULL) {
- dev = po->prot_hook.dev;
+ dev = packet_cached_dev_get(po);
proto = po->num;
addr = NULL;
} else {
@@ -2233,19 +2249,17 @@ static int packet_snd(struct socket *soc
proto = saddr->sll_protocol;
addr = saddr->sll_addr;
dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
- need_rls_dev = true;
}
err = -ENXIO;
- if (dev == NULL)
+ if (unlikely(dev == NULL))
goto out_unlock;
- if (sock->type == SOCK_RAW)
- reserve = dev->hard_header_len;
-
err = -ENETDOWN;
- if (!(dev->flags & IFF_UP))
+ if (unlikely(!(dev->flags & IFF_UP)))
goto out_unlock;
+ if (sock->type == SOCK_RAW)
+ reserve = dev->hard_header_len;
if (po->has_vnet_hdr) {
vnet_hdr_len = sizeof(vnet_hdr);
@@ -2378,15 +2392,14 @@ static int packet_snd(struct socket *soc
if (err > 0 && (err = net_xmit_errno(err)) != 0)
goto out_unlock;
- if (need_rls_dev)
- dev_put(dev);
+ dev_put(dev);
return len;
out_free:
kfree_skb(skb);
out_unlock:
- if (dev && need_rls_dev)
+ if (dev)
dev_put(dev);
out:
return err;
@@ -2603,6 +2616,7 @@ static int packet_create(struct net *net
po = pkt_sk(sk);
sk->sk_family = PF_PACKET;
po->num = proto;
+ RCU_INIT_POINTER(po->cached_dev, NULL);
sk->sk_destruct = packet_sock_destruct;
sk_refcnt_debug_inc(sk);
next prev parent reply other threads:[~2013-12-06 21:52 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-06 21:52 [PATCH 3.4 00/32] 3.4.73-stable review Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 01/32] net: Fix "ip rule delete table 256" Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 02/32] ipv6: use rt6_get_dflt_router to get default router in rt6_route_rcv Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 03/32] random32: fix off-by-one in seeding requirement Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 04/32] bonding: dont permit to use ARP monitoring in 802.3ad mode Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 05/32] 6lowpan: Uncompression of traffic class field was incorrect Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 06/32] bonding: fix two race conditions in bond_store_updelay/downdelay Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 07/32] isdnloop: use strlcpy() instead of strcpy() Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 08/32] connector: improved unaligned access error fix Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 09/32] ipv4: fix possible seqlock deadlock Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 10/32] inet: prevent leakage of uninitialized memory to user in recv syscalls Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 11/32] net: rework recvmsg handler msg_name and msg_namelen logic Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 12/32] net: add BUG_ON if kernel advertises msg_namelen > sizeof(struct sockaddr_storage) Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 13/32] inet: fix addr_len/msg->msg_namelen assignment in recv_error and rxpmtu functions Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 14/32] net: clamp ->msg_namelen instead of returning an error Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 15/32] ipv6: fix leaking uninitialized port number of offender sockaddr Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 16/32] atm: idt77252: fix dev refcnt leak Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 17/32] net: core: Always propagate flag changes to interfaces Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 18/32] bridge: flush brs address entry in fdb when remove the bridge dev Greg Kroah-Hartman
2013-12-06 21:52 ` Greg Kroah-Hartman [this message]
2013-12-06 21:52 ` [PATCH 3.4 20/32] af_packet: block BH in prb_shutdown_retire_blk_timer() Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 21/32] net: update consumers of MSG_MORE to recognize MSG_SENDPAGE_NOTLAST Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 22/32] inet: fix possible seqlock deadlocks Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 23/32] ipv6: fix possible seqlock deadlock in ip6_finish_output2 Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 24/32] {pktgen, xfrm} Update IPv4 header total len and checksum after tranformation Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 25/32] HID: picolcd_core: validate output report details Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 26/32] mmc: block: fix a bug of error handling in MMC driver Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 27/32] nfsd: use "init_net" for portmapper Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 28/32] video: kyro: fix incorrect sizes when copying to userspace Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 29/32] iommu/vt-d: Fixed interaction of VFIO_IOMMU_MAP_DMA with IOMMU address limits Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 30/32] elevator: acquire q->sysfs_lock in elevator_change() Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 31/32] dm: fix truncated status strings Greg Kroah-Hartman
2013-12-06 21:52 ` [PATCH 3.4 32/32] blk-core: Fix memory corruption if blkcg_init_queue fails Greg Kroah-Hartman
2013-12-07 6:45 ` [PATCH 3.4 00/32] 3.4.73-stable review Guenter Roeck
2013-12-07 17:02 ` Greg Kroah-Hartman
2013-12-07 22:16 ` Shuah Khan
2013-12-08 14:38 ` Satoru Takeuchi
2013-12-08 15:23 ` Greg Kroah-Hartman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20131206214958.593556171@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=davem@davemloft.net \
--cc=dborkman@redhat.com \
--cc=eric.dumazet@gmail.com \
--cc=greearb@candelatech.com \
--cc=linux-kernel@vger.kernel.org \
--cc=noureddine@aristanetworks.com \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).