From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Francesco Ruggeri <fruggeri@arista.com>,
"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 3.14 09/37] packet: race condition in packet_bind
Date: Mon, 7 Dec 2015 09:26:21 -0500 [thread overview]
Message-ID: <20151207141743.729784230@linuxfoundation.org> (raw)
In-Reply-To: <20151207141743.221453847@linuxfoundation.org>
3.14-stable review patch. If anyone has any objections, please let me know.
------------------
From: Francesco Ruggeri <fruggeri@aristanetworks.com>
[ Upstream commit 30f7ea1c2b5f5fb7462c5ae44fe2e40cb2d6a474 ]
There is a race conditions between packet_notifier and packet_bind{_spkt}.
It happens if packet_notifier(NETDEV_UNREGISTER) executes between the
time packet_bind{_spkt} takes a reference on the new netdevice and the
time packet_do_bind sets po->ifindex.
In this case the notification can be missed.
If this happens during a dev_change_net_namespace this can result in the
netdevice to be moved to the new namespace while the packet_sock in the
old namespace still holds a reference on it. When the netdevice is later
deleted in the new namespace the deletion hangs since the packet_sock
is not found in the new namespace' &net->packet.sklist.
It can be reproduced with the script below.
This patch makes packet_do_bind check again for the presence of the
netdevice in the packet_sock's namespace after the synchronize_net
in unregister_prot_hook.
More in general it also uses the rcu lock for the duration of the bind
to stop dev_change_net_namespace/rollback_registered_many from
going past the synchronize_net following unlist_netdevice, so that
no NETDEV_UNREGISTER notifications can happen on the new netdevice
while the bind is executing. In order to do this some code from
packet_bind{_spkt} is consolidated into packet_do_dev.
import socket, os, time, sys
proto=7
realDev='em1'
vlanId=400
if len(sys.argv) > 1:
vlanId=int(sys.argv[1])
dev='vlan%d' % vlanId
os.system('taskset -p 0x10 %d' % os.getpid())
s = socket.socket(socket.PF_PACKET, socket.SOCK_RAW, proto)
os.system('ip link add link %s name %s type vlan id %d' %
(realDev, dev, vlanId))
os.system('ip netns add dummy')
pid=os.fork()
if pid == 0:
# dev should be moved while packet_do_bind is in synchronize net
os.system('taskset -p 0x20000 %d' % os.getpid())
os.system('ip link set %s netns dummy' % dev)
os.system('ip netns exec dummy ip link del %s' % dev)
s.close()
sys.exit(0)
time.sleep(.004)
try:
s.bind(('%s' % dev, proto+1))
except:
print 'Could not bind socket'
s.close()
os.system('ip netns del dummy')
sys.exit(0)
os.waitpid(pid, 0)
s.close()
os.system('ip netns del dummy')
sys.exit(0)
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/packet/af_packet.c | 80 ++++++++++++++++++++++++++++++-------------------
1 file changed, 49 insertions(+), 31 deletions(-)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2642,22 +2642,40 @@ static int packet_release(struct socket
* Attach a packet hook.
*/
-static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
+static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
+ __be16 proto)
{
struct packet_sock *po = pkt_sk(sk);
struct net_device *dev_curr;
__be16 proto_curr;
bool need_rehook;
+ struct net_device *dev = NULL;
+ int ret = 0;
+ bool unlisted = false;
- if (po->fanout) {
- if (dev)
- dev_put(dev);
-
+ if (po->fanout)
return -EINVAL;
- }
lock_sock(sk);
spin_lock(&po->bind_lock);
+ rcu_read_lock();
+
+ if (name) {
+ dev = dev_get_by_name_rcu(sock_net(sk), name);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+ } else if (ifindex) {
+ dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+ }
+
+ if (dev)
+ dev_hold(dev);
proto_curr = po->prot_hook.type;
dev_curr = po->prot_hook.dev;
@@ -2665,14 +2683,29 @@ static int packet_do_bind(struct sock *s
need_rehook = proto_curr != proto || dev_curr != dev;
if (need_rehook) {
- unregister_prot_hook(sk, true);
+ if (po->running) {
+ rcu_read_unlock();
+ __unregister_prot_hook(sk, true);
+ rcu_read_lock();
+ dev_curr = po->prot_hook.dev;
+ if (dev)
+ unlisted = !dev_get_by_index_rcu(sock_net(sk),
+ dev->ifindex);
+ }
po->num = proto;
po->prot_hook.type = proto;
- po->prot_hook.dev = dev;
- po->ifindex = dev ? dev->ifindex : 0;
- packet_cached_dev_assign(po, dev);
+ if (unlikely(unlisted)) {
+ dev_put(dev);
+ po->prot_hook.dev = NULL;
+ po->ifindex = -1;
+ packet_cached_dev_reset(po);
+ } else {
+ po->prot_hook.dev = dev;
+ po->ifindex = dev ? dev->ifindex : 0;
+ packet_cached_dev_assign(po, dev);
+ }
}
if (dev_curr)
dev_put(dev_curr);
@@ -2680,7 +2713,7 @@ static int packet_do_bind(struct sock *s
if (proto == 0 || !need_rehook)
goto out_unlock;
- if (!dev || (dev->flags & IFF_UP)) {
+ if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
register_prot_hook(sk);
} else {
sk->sk_err = ENETDOWN;
@@ -2689,9 +2722,10 @@ static int packet_do_bind(struct sock *s
}
out_unlock:
+ rcu_read_unlock();
spin_unlock(&po->bind_lock);
release_sock(sk);
- return 0;
+ return ret;
}
/*
@@ -2703,8 +2737,6 @@ static int packet_bind_spkt(struct socke
{
struct sock *sk = sock->sk;
char name[15];
- struct net_device *dev;
- int err = -ENODEV;
/*
* Check legality
@@ -2714,19 +2746,13 @@ static int packet_bind_spkt(struct socke
return -EINVAL;
strlcpy(name, uaddr->sa_data, sizeof(name));
- dev = dev_get_by_name(sock_net(sk), name);
- if (dev)
- err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
- return err;
+ return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
}
static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
struct sock *sk = sock->sk;
- struct net_device *dev = NULL;
- int err;
-
/*
* Check legality
@@ -2737,16 +2763,8 @@ static int packet_bind(struct socket *so
if (sll->sll_family != AF_PACKET)
return -EINVAL;
- if (sll->sll_ifindex) {
- err = -ENODEV;
- dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
- if (dev == NULL)
- goto out;
- }
- err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
-
-out:
- return err;
+ return packet_do_bind(sk, NULL, sll->sll_ifindex,
+ sll->sll_protocol ? : pkt_sk(sk)->num);
}
static struct proto packet_proto = {
next prev parent reply other threads:[~2015-12-07 14:33 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-12-07 14:26 [PATCH 3.14 00/37] 3.14.58-stable review Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 02/37] macvtap: unbreak receiving of gro skb with frag list Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 03/37] ppp: fix pppoe_dev deletion condition in pppoe_release() Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 04/37] RDS-TCP: Recover correctly from pskb_pull()/pksb_trim() failure in rds_tcp_data_recv Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 05/37] net/mlx4: Copy/set only sizeof struct mlx4_eqe bytes Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 06/37] stmmac: Correctly report PTP capabilities Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 07/37] ipmr: fix possible race resulting from improper usage of IP_INC_STATS_BH() in preemptible context Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 08/37] net: avoid NULL deref in inet_ctl_sock_destroy() Greg Kroah-Hartman
2015-12-07 14:26 ` Greg Kroah-Hartman [this message]
2015-12-07 14:26 ` [PATCH 3.14 10/37] net: fix a race in dst_release() Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 11/37] virtio-net: drop NETIF_F_FRAGLIST Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 12/37] RDS: verify the underlying transport exists before creating a connection Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 13/37] ARM: 8426/1: dma-mapping: add missing range check in dma_mmap() Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 14/37] ARM: 8427/1: dma-mapping: add support for offset parameter " Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 15/37] ARM: orion: Fix DSA platform device after mvmdio conversion Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 16/37] ARM: common: edma: Fix channel parameter for irq callbacks Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 17/37] x86/setup: Extend low identity map to cover whole kernel range Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 18/37] x86/setup: Fix low identity map for >= 2GB " Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 19/37] x86/cpu: Call verify_cpu() after having entered long mode too Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 20/37] x86/cpu: Fix SMAP check in PVOPS environments Greg Kroah-Hartman
2015-12-07 14:26 ` Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 21/37] mac80211: fix driver RSSI event calculations Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 22/37] net: mvneta: Fix CPU_MAP registers initialisation Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 23/37] mwifiex: fix mwifiex_rdeeprom_read() Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 24/37] staging: rtl8712: Add device ID for Sitecom WLA2100 Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 25/37] Bluetooth: hidp: fix device disconnect on idle timeout Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 26/37] Bluetooth: ath3k: Add new AR3012 0930:021c id Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 27/37] Bluetooth: ath3k: Add support of AR3012 0cf3:817b device Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 28/37] can: sja1000: clear interrupts on start Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 29/37] arm64: Fix compat register mappings Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 30/37] usblp: do not set TASK_INTERRUPTIBLE before lock Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 32/37] USB: ti_usb_3410_5052: Add Honeywell HGI80 ID Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 35/37] ALSA: usb-audio: add packet size quirk for the Medeli DD305 Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 36/37] ALSA: usb-audio: prevent CH345 multiport output SysEx corruption Greg Kroah-Hartman
2015-12-07 14:26 ` [PATCH 3.14 37/37] ALSA: usb-audio: work around CH345 input " Greg Kroah-Hartman
2015-12-07 17:18 ` [PATCH 3.14 00/37] 3.14.58-stable review Shuah Khan
2015-12-07 21:16 ` Guenter Roeck
2015-12-09 3:26 ` Greg Kroah-Hartman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20151207141743.729784230@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=davem@davemloft.net \
--cc=fruggeri@arista.com \
--cc=linux-kernel@vger.kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.