From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Francesco Ruggeri <fruggeri@arista.com>,
"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 3.14 09/37] packet: race condition in packet_bind
Date: Mon, 7 Dec 2015 09:26:21 -0500 [thread overview]
Message-ID: <20151207141743.729784230@linuxfoundation.org> (raw)
In-Reply-To: <20151207141743.221453847@linuxfoundation.org>
3.14-stable review patch. If anyone has any objections, please let me know.
------------------
From: Francesco Ruggeri <fruggeri@aristanetworks.com>
[ Upstream commit 30f7ea1c2b5f5fb7462c5ae44fe2e40cb2d6a474 ]
There is a race conditions between packet_notifier and packet_bind{_spkt}.
It happens if packet_notifier(NETDEV_UNREGISTER) executes between the
time packet_bind{_spkt} takes a reference on the new netdevice and the
time packet_do_bind sets po->ifindex.
In this case the notification can be missed.
If this happens during a dev_change_net_namespace this can result in the
netdevice to be moved to the new namespace while the packet_sock in the
old namespace still holds a reference on it. When the netdevice is later
deleted in the new namespace the deletion hangs since the packet_sock
is not found in the new namespace' &net->packet.sklist.
It can be reproduced with the script below.
This patch makes packet_do_bind check again for the presence of the
netdevice in the packet_sock's namespace after the synchronize_net
in unregister_prot_hook.
More in general it also uses the rcu lock for the duration of the bind
to stop dev_change_net_namespace/rollback_registered_many from
going past the synchronize_net following unlist_netdevice, so that
no NETDEV_UNREGISTER notifications can happen on the new netdevice
while the bind is executing. In order to do this some code from
packet_bind{_spkt} is consolidated into packet_do_dev.
import socket, os, time, sys
proto=7
realDev='em1'
vlanId=400
if len(sys.argv) > 1:
vlanId=int(sys.argv[1])
dev='vlan%d' % vlanId
os.system('taskset -p 0x10 %d' % os.getpid())
s = socket.socket(socket.PF_PACKET, socket.SOCK_RAW, proto)
os.system('ip link add link %s name %s type vlan id %d' %
(realDev, dev, vlanId))
os.system('ip netns add dummy')
pid=os.fork()
if pid == 0:
# dev should be moved while packet_do_bind is in synchronize net
os.system('taskset -p 0x20000 %d' % os.getpid())
os.system('ip link set %s netns dummy' % dev)
os.system('ip netns exec dummy ip link del %s' % dev)
s.close()
sys.exit(0)
time.sleep(.004)
try:
s.bind(('%s' % dev, proto+1))
except:
print 'Could not bind socket'
s.close()
os.system('ip netns del dummy')
sys.exit(0)
os.waitpid(pid, 0)
s.close()
os.system('ip netns del dummy')
sys.exit(0)
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/packet/af_packet.c | 80 ++++++++++++++++++++++++++++++-------------------
1 file changed, 49 insertions(+), 31 deletions(-)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2642,22 +2642,40 @@ static int packet_release(struct socket
* Attach a packet hook.
*/
-static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
+static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
+ __be16 proto)
{
struct packet_sock *po = pkt_sk(sk);
struct net_device *dev_curr;
__be16 proto_curr;
bool need_rehook;
+ struct net_device *dev = NULL;
+ int ret = 0;
+ bool unlisted = false;
- if (po->fanout) {
- if (dev)
- dev_put(dev);
-
+ if (po->fanout)
return -EINVAL;
- }
lock_sock(sk);
spin_lock(&po->bind_lock);
+ rcu_read_lock();
+
+ if (name) {
+ dev = dev_get_by_name_rcu(sock_net(sk), name);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+ } else if (ifindex) {
+ dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+ }
+
+ if (dev)
+ dev_hold(dev);
proto_curr = po->prot_hook.type;
dev_curr = po->prot_hook.dev;
@@ -2665,14 +2683,29 @@ static int packet_do_bind(struct sock *s
need_rehook = proto_curr != proto || dev_curr != dev;
if (need_rehook) {
- unregister_prot_hook(sk, true);
+ if (po->running) {
+ rcu_read_unlock();
+ __unregister_prot_hook(sk, true);
+ rcu_read_lock();
+ dev_curr = po->prot_hook.dev;
+ if (dev)
+ unlisted = !dev_get_by_index_rcu(sock_net(sk),
+ dev->ifindex);
+ }
po->num = proto;
po->prot_hook.type = proto;
- po->prot_hook.dev = dev;
- po->ifindex = dev ? dev->ifindex : 0;
- packet_cached_dev_assign(po, dev);
+ if (unlikely(unlisted)) {
+ dev_put(dev);
+ po->prot_hook.dev = NULL;
+ po->ifindex = -1;
+ packet_cached_dev_reset(po);
+ } else {
+ po->prot_hook.dev = dev;
+ po->ifindex = dev ? dev->ifindex : 0;
+ packet_cached_dev_assign(po, dev);
+ }
}
if (dev_curr)
dev_put(dev_curr);
@@ -2680,7 +2713,7 @@ static int packet_do_bind(struct sock *s
if (proto == 0 || !need_rehook)
goto out_unlock;
- if (!dev || (dev->flags & IFF_UP)) {
+ if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
register_prot_hook(sk);
} else {
sk->sk_err = ENETDOWN;
@@ -2689,9 +2722,10 @@ static int packet_do_bind(struct sock *s
}
out_unlock:
+ rcu_read_unlock();
spin_unlock(&po->bind_lock);
release_sock(sk);
- return 0;
+ return ret;
}
/*
@@ -2703,8 +2737,6 @@ static int packet_bind_spkt(struct socke
{
struct sock *sk = sock->sk;
char name[15];
- struct net_device *dev;
- int err = -ENODEV;
/*
* Check legality
@@ -2714,19 +2746,13 @@ static int packet_bind_spkt(struct socke
return -EINVAL;
strlcpy(name, uaddr->sa_data, sizeof(name));
- dev = dev_get_by_name(sock_net(sk), name);
- if (dev)
- err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
- return err;
+ return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
}
static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
struct sock *sk = sock->sk;
- struct net_device *dev = NULL;
- int err;
-
/*
* Check legality
@@ -2737,16 +2763,8 @@ static int packet_bind(struct socket *so
if (sll->sll_family != AF_PACKET)
return -EINVAL;
- if (sll->sll_ifindex) {
- err = -ENODEV;
- dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
- if (dev == NULL)
- goto out;
- }
- err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
-
-out:
- return err;
+ return packet_do_bind(sk, NULL, sll->sll_ifindex,
+ sll->sll_protocol ? : pkt_sk(sk)->num);
}
static struct proto packet_proto = {
next prev parent reply other threads:[~2015-12-07 14:27 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-12-07 14:26 [PATCH 3.14 00/37] 3.14.58-stable review Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 02/37] macvtap: unbreak receiving of gro skb with frag list Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 03/37] ppp: fix pppoe_dev deletion condition in pppoe_release() Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 04/37] RDS-TCP: Recover correctly from pskb_pull()/pksb_trim() failure in rds_tcp_data_recv Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 05/37] net/mlx4: Copy/set only sizeof struct mlx4_eqe bytes Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 06/37] stmmac: Correctly report PTP capabilities Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 07/37] ipmr: fix possible race resulting from improper usage of IP_INC_STATS_BH() in preemptible context Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 08/37] net: avoid NULL deref in inet_ctl_sock_destroy() Greg Kroah-Hartman
2015-12-07 14:26 ` Greg Kroah-Hartman [this message]
2015-12-07 14:26 ` [PATCH 3.14 10/37] net: fix a race in dst_release() Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 11/37] virtio-net: drop NETIF_F_FRAGLIST Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 12/37] RDS: verify the underlying transport exists before creating a connection Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 13/37] ARM: 8426/1: dma-mapping: add missing range check in dma_mmap() Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 14/37] ARM: 8427/1: dma-mapping: add support for offset parameter " Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 15/37] ARM: orion: Fix DSA platform device after mvmdio conversion Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 16/37] ARM: common: edma: Fix channel parameter for irq callbacks Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 17/37] x86/setup: Extend low identity map to cover whole kernel range Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 18/37] x86/setup: Fix low identity map for >= 2GB " Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 19/37] x86/cpu: Call verify_cpu() after having entered long mode too Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 20/37] x86/cpu: Fix SMAP check in PVOPS environments Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 21/37] mac80211: fix driver RSSI event calculations Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 22/37] net: mvneta: Fix CPU_MAP registers initialisation Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 23/37] mwifiex: fix mwifiex_rdeeprom_read() Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 24/37] staging: rtl8712: Add device ID for Sitecom WLA2100 Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 25/37] Bluetooth: hidp: fix device disconnect on idle timeout Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 26/37] Bluetooth: ath3k: Add new AR3012 0930:021c id Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 27/37] Bluetooth: ath3k: Add support of AR3012 0cf3:817b device Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 28/37] can: sja1000: clear interrupts on start Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 29/37] arm64: Fix compat register mappings Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 30/37] usblp: do not set TASK_INTERRUPTIBLE before lock Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 32/37] USB: ti_usb_3410_5052: Add Honeywell HGI80 ID Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 35/37] ALSA: usb-audio: add packet size quirk for the Medeli DD305 Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 36/37] ALSA: usb-audio: prevent CH345 multiport output SysEx corruption Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 37/37] ALSA: usb-audio: work around CH345 input " Greg Kroah-Hartman
2015-12-07 17:18 ` [PATCH 3.14 00/37] 3.14.58-stable review Shuah Khan
2015-12-07 21:16 ` Guenter Roeck
2015-12-09 3:26 ` Greg Kroah-Hartman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20151207141743.729784230@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=davem@davemloft.net \
--cc=fruggeri@arista.com \
--cc=linux-kernel@vger.kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).