netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/1] Revert "af-packet: Use existing netdev reference for bound sockets"
@ 2013-11-18  5:32 Salam Noureddine
  2013-11-18  7:42 ` Salam Noureddine
  0 siblings, 1 reply; 9+ messages in thread
From: Salam Noureddine @ 2013-11-18  5:32 UTC (permalink / raw)
  To: David S. Miller, Daniel Borkmann, Willem de Bruijn, Phil Sutter,
	Eric Dumazet, netdev
  Cc: Salam Noureddine

From: Salam Noureddine <noureddine@bs324.sjc.aristanetworks.com>

This reverts commit 827d978037d7d0bf0860481948c6d26ead10042f
("af-packet: Use existing netdev reference for bound sockets")

The patch introduced a race condition between packet_snd and
packet_notifier when a net_device is being unregistered. In the case of
a bound socket, packet_notifier can drop the last reference to the
net_device and packet_snd might end up sending a packet over a freed
net_device.

Signed-off-by: Salam Noureddine <noureddine@bs324.sjc.aristanetworks.com>
---
 net/packet/af_packet.c |   26 +++++++++++---------------
 1 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2e8286b..34fe9eb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2057,8 +2057,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
-	bool need_rls_dev = false;
-	int err, reserve = 0;
+	int ifindex, err, reserve = 0;
 	void *ph;
 	struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
 	int tp_len, size_max;
@@ -2070,7 +2069,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	mutex_lock(&po->pg_vec_lock);
 
 	if (saddr == NULL) {
-		dev = po->prot_hook.dev;
+		ifindex	= po->ifindex;
 		proto	= po->num;
 		addr	= NULL;
 	} else {
@@ -2081,12 +2080,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 					+ offsetof(struct sockaddr_ll,
 						sll_addr)))
 			goto out;
+		ifindex	= saddr->sll_ifindex;
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
-		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
-		need_rls_dev = true;
 	}
 
+	dev = dev_get_by_index(sock_net(&po->sk), ifindex);
 	err = -ENXIO;
 	if (unlikely(dev == NULL))
 		goto out;
@@ -2173,8 +2172,7 @@ out_status:
 	__packet_set_status(po, ph, status);
 	kfree_skb(skb);
 out_put:
-	if (need_rls_dev)
-		dev_put(dev);
+	dev_put(dev);
 out:
 	mutex_unlock(&po->pg_vec_lock);
 	return err;
@@ -2212,9 +2210,8 @@ static int packet_snd(struct socket *sock,
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
-	bool need_rls_dev = false;
 	unsigned char *addr;
-	int err, reserve = 0;
+	int ifindex, err, reserve = 0;
 	struct virtio_net_hdr vnet_hdr = { 0 };
 	int offset = 0;
 	int vnet_hdr_len;
@@ -2228,7 +2225,7 @@ static int packet_snd(struct socket *sock,
 	 */
 
 	if (saddr == NULL) {
-		dev = po->prot_hook.dev;
+		ifindex	= po->ifindex;
 		proto	= po->num;
 		addr	= NULL;
 	} else {
@@ -2237,12 +2234,12 @@ static int packet_snd(struct socket *sock,
 			goto out;
 		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
 			goto out;
+		ifindex	= saddr->sll_ifindex;
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
-		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
-		need_rls_dev = true;
 	}
 
+	dev = dev_get_by_index(sock_net(sk), ifindex);
 	err = -ENXIO;
 	if (dev == NULL)
 		goto out_unlock;
@@ -2386,15 +2383,14 @@ static int packet_snd(struct socket *sock,
 	if (err > 0 && (err = net_xmit_errno(err)) != 0)
 		goto out_unlock;
 
-	if (need_rls_dev)
-		dev_put(dev);
+	dev_put(dev);
 
 	return len;
 
 out_free:
 	kfree_skb(skb);
 out_unlock:
-	if (dev && need_rls_dev)
+	if (dev)
 		dev_put(dev);
 out:
 	return err;
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 1/1] Revert "af-packet: Use existing netdev reference for bound sockets"
@ 2013-11-18  7:40 Salam Noureddine
  2013-11-18  9:00 ` Daniel Borkmann
  0 siblings, 1 reply; 9+ messages in thread
From: Salam Noureddine @ 2013-11-18  7:40 UTC (permalink / raw)
  To: David S. Miller, Daniel Borkmann, Willem de Bruijn, Phil Sutter,
	Eric Dumazet, netdev
  Cc: Salam Noureddine

This reverts commit 827d978037d7d0bf0860481948c6d26ead10042f
("af-packet: Use existing netdev reference for bound sockets")

The patch introduced a race condition between packet_snd and
packet_notifier when a net_device is being unregistered. In the case of
a bound socket, packet_notifier can drop the last reference to the
net_device and packet_snd might end up sending a packet over a freed
net_device.

Signed-off-by: Salam Noureddine <noureddine@aristanetworks.com>
---
 net/packet/af_packet.c |   26 +++++++++++---------------
 1 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2e8286b..34fe9eb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2057,8 +2057,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
-	bool need_rls_dev = false;
-	int err, reserve = 0;
+	int ifindex, err, reserve = 0;
 	void *ph;
 	struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
 	int tp_len, size_max;
@@ -2070,7 +2069,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	mutex_lock(&po->pg_vec_lock);
 
 	if (saddr == NULL) {
-		dev = po->prot_hook.dev;
+		ifindex	= po->ifindex;
 		proto	= po->num;
 		addr	= NULL;
 	} else {
@@ -2081,12 +2080,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 					+ offsetof(struct sockaddr_ll,
 						sll_addr)))
 			goto out;
+		ifindex	= saddr->sll_ifindex;
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
-		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
-		need_rls_dev = true;
 	}
 
+	dev = dev_get_by_index(sock_net(&po->sk), ifindex);
 	err = -ENXIO;
 	if (unlikely(dev == NULL))
 		goto out;
@@ -2173,8 +2172,7 @@ out_status:
 	__packet_set_status(po, ph, status);
 	kfree_skb(skb);
 out_put:
-	if (need_rls_dev)
-		dev_put(dev);
+	dev_put(dev);
 out:
 	mutex_unlock(&po->pg_vec_lock);
 	return err;
@@ -2212,9 +2210,8 @@ static int packet_snd(struct socket *sock,
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
-	bool need_rls_dev = false;
 	unsigned char *addr;
-	int err, reserve = 0;
+	int ifindex, err, reserve = 0;
 	struct virtio_net_hdr vnet_hdr = { 0 };
 	int offset = 0;
 	int vnet_hdr_len;
@@ -2228,7 +2225,7 @@ static int packet_snd(struct socket *sock,
 	 */
 
 	if (saddr == NULL) {
-		dev = po->prot_hook.dev;
+		ifindex	= po->ifindex;
 		proto	= po->num;
 		addr	= NULL;
 	} else {
@@ -2237,12 +2234,12 @@ static int packet_snd(struct socket *sock,
 			goto out;
 		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
 			goto out;
+		ifindex	= saddr->sll_ifindex;
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
-		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
-		need_rls_dev = true;
 	}
 
+	dev = dev_get_by_index(sock_net(sk), ifindex);
 	err = -ENXIO;
 	if (dev == NULL)
 		goto out_unlock;
@@ -2386,15 +2383,14 @@ static int packet_snd(struct socket *sock,
 	if (err > 0 && (err = net_xmit_errno(err)) != 0)
 		goto out_unlock;
 
-	if (need_rls_dev)
-		dev_put(dev);
+	dev_put(dev);
 
 	return len;
 
 out_free:
 	kfree_skb(skb);
 out_unlock:
-	if (dev && need_rls_dev)
+	if (dev)
 		dev_put(dev);
 out:
 	return err;
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] Revert "af-packet: Use existing netdev reference for bound sockets"
  2013-11-18  5:32 Salam Noureddine
@ 2013-11-18  7:42 ` Salam Noureddine
  0 siblings, 0 replies; 9+ messages in thread
From: Salam Noureddine @ 2013-11-18  7:42 UTC (permalink / raw)
  To: David S. Miller, Daniel Borkmann, Willem de Bruijn, Phil Sutter,
	Eric Dumazet, netdev

Please ignore this version of the patch as it still has an issue with
the signed-off field. Already sent a corrected one.

Thanks,

Salam

On Sun, Nov 17, 2013 at 9:32 PM, Salam Noureddine
<noureddine@aristanetworks.com> wrote:
> From: Salam Noureddine <noureddine@bs324.sjc.aristanetworks.com>
>
> This reverts commit 827d978037d7d0bf0860481948c6d26ead10042f
> ("af-packet: Use existing netdev reference for bound sockets")
>
> The patch introduced a race condition between packet_snd and
> packet_notifier when a net_device is being unregistered. In the case of
> a bound socket, packet_notifier can drop the last reference to the
> net_device and packet_snd might end up sending a packet over a freed
> net_device.
>
> Signed-off-by: Salam Noureddine <noureddine@bs324.sjc.aristanetworks.com>
> ---
>  net/packet/af_packet.c |   26 +++++++++++---------------
>  1 files changed, 11 insertions(+), 15 deletions(-)
>
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 2e8286b..34fe9eb 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -2057,8 +2057,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>         struct sk_buff *skb;
>         struct net_device *dev;
>         __be16 proto;
> -       bool need_rls_dev = false;
> -       int err, reserve = 0;
> +       int ifindex, err, reserve = 0;
>         void *ph;
>         struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
>         int tp_len, size_max;
> @@ -2070,7 +2069,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>         mutex_lock(&po->pg_vec_lock);
>
>         if (saddr == NULL) {
> -               dev = po->prot_hook.dev;
> +               ifindex = po->ifindex;
>                 proto   = po->num;
>                 addr    = NULL;
>         } else {
> @@ -2081,12 +2080,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>                                         + offsetof(struct sockaddr_ll,
>                                                 sll_addr)))
>                         goto out;
> +               ifindex = saddr->sll_ifindex;
>                 proto   = saddr->sll_protocol;
>                 addr    = saddr->sll_addr;
> -               dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
> -               need_rls_dev = true;
>         }
>
> +       dev = dev_get_by_index(sock_net(&po->sk), ifindex);
>         err = -ENXIO;
>         if (unlikely(dev == NULL))
>                 goto out;
> @@ -2173,8 +2172,7 @@ out_status:
>         __packet_set_status(po, ph, status);
>         kfree_skb(skb);
>  out_put:
> -       if (need_rls_dev)
> -               dev_put(dev);
> +       dev_put(dev);
>  out:
>         mutex_unlock(&po->pg_vec_lock);
>         return err;
> @@ -2212,9 +2210,8 @@ static int packet_snd(struct socket *sock,
>         struct sk_buff *skb;
>         struct net_device *dev;
>         __be16 proto;
> -       bool need_rls_dev = false;
>         unsigned char *addr;
> -       int err, reserve = 0;
> +       int ifindex, err, reserve = 0;
>         struct virtio_net_hdr vnet_hdr = { 0 };
>         int offset = 0;
>         int vnet_hdr_len;
> @@ -2228,7 +2225,7 @@ static int packet_snd(struct socket *sock,
>          */
>
>         if (saddr == NULL) {
> -               dev = po->prot_hook.dev;
> +               ifindex = po->ifindex;
>                 proto   = po->num;
>                 addr    = NULL;
>         } else {
> @@ -2237,12 +2234,12 @@ static int packet_snd(struct socket *sock,
>                         goto out;
>                 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
>                         goto out;
> +               ifindex = saddr->sll_ifindex;
>                 proto   = saddr->sll_protocol;
>                 addr    = saddr->sll_addr;
> -               dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
> -               need_rls_dev = true;
>         }
>
> +       dev = dev_get_by_index(sock_net(sk), ifindex);
>         err = -ENXIO;
>         if (dev == NULL)
>                 goto out_unlock;
> @@ -2386,15 +2383,14 @@ static int packet_snd(struct socket *sock,
>         if (err > 0 && (err = net_xmit_errno(err)) != 0)
>                 goto out_unlock;
>
> -       if (need_rls_dev)
> -               dev_put(dev);
> +       dev_put(dev);
>
>         return len;
>
>  out_free:
>         kfree_skb(skb);
>  out_unlock:
> -       if (dev && need_rls_dev)
> +       if (dev)
>                 dev_put(dev);
>  out:
>         return err;
> --
> 1.7.4.4
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] Revert "af-packet: Use existing netdev reference for bound sockets"
  2013-11-18  7:40 [PATCH 1/1] Revert "af-packet: Use existing netdev reference for bound sockets" Salam Noureddine
@ 2013-11-18  9:00 ` Daniel Borkmann
  2013-11-18 18:03   ` David Miller
  0 siblings, 1 reply; 9+ messages in thread
From: Daniel Borkmann @ 2013-11-18  9:00 UTC (permalink / raw)
  To: Salam Noureddine
  Cc: David S. Miller, Willem de Bruijn, Phil Sutter, Eric Dumazet,
	netdev, Ben Greear

On 11/18/2013 08:40 AM, Salam Noureddine wrote:
> This reverts commit 827d978037d7d0bf0860481948c6d26ead10042f
> ("af-packet: Use existing netdev reference for bound sockets")
>
> The patch introduced a race condition between packet_snd and
> packet_notifier when a net_device is being unregistered. In the case of
> a bound socket, packet_notifier can drop the last reference to the
> net_device and packet_snd might end up sending a packet over a freed
> net_device.

So there's no other workaround possible like e.g. setting a flag in
struct packet_sock so that in case our netdevice goes down, we just
set the flag and if set, we return with -ENXIO in send path?
Reverting this would decrease performance for everyone as we would
then do the lookup every time we send a packet again.

> Signed-off-by: Salam Noureddine <noureddine@aristanetworks.com>
> ---
>   net/packet/af_packet.c |   26 +++++++++++---------------
>   1 files changed, 11 insertions(+), 15 deletions(-)
>
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 2e8286b..34fe9eb 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -2057,8 +2057,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>   	struct sk_buff *skb;
>   	struct net_device *dev;
>   	__be16 proto;
> -	bool need_rls_dev = false;
> -	int err, reserve = 0;
> +	int ifindex, err, reserve = 0;
>   	void *ph;
>   	struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
>   	int tp_len, size_max;
> @@ -2070,7 +2069,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>   	mutex_lock(&po->pg_vec_lock);
>
>   	if (saddr == NULL) {
> -		dev = po->prot_hook.dev;
> +		ifindex	= po->ifindex;
>   		proto	= po->num;
>   		addr	= NULL;
>   	} else {
> @@ -2081,12 +2080,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>   					+ offsetof(struct sockaddr_ll,
>   						sll_addr)))
>   			goto out;
> +		ifindex	= saddr->sll_ifindex;
>   		proto	= saddr->sll_protocol;
>   		addr	= saddr->sll_addr;
> -		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
> -		need_rls_dev = true;
>   	}
>
> +	dev = dev_get_by_index(sock_net(&po->sk), ifindex);
>   	err = -ENXIO;
>   	if (unlikely(dev == NULL))
>   		goto out;
> @@ -2173,8 +2172,7 @@ out_status:
>   	__packet_set_status(po, ph, status);
>   	kfree_skb(skb);
>   out_put:
> -	if (need_rls_dev)
> -		dev_put(dev);
> +	dev_put(dev);
>   out:
>   	mutex_unlock(&po->pg_vec_lock);
>   	return err;
> @@ -2212,9 +2210,8 @@ static int packet_snd(struct socket *sock,
>   	struct sk_buff *skb;
>   	struct net_device *dev;
>   	__be16 proto;
> -	bool need_rls_dev = false;
>   	unsigned char *addr;
> -	int err, reserve = 0;
> +	int ifindex, err, reserve = 0;
>   	struct virtio_net_hdr vnet_hdr = { 0 };
>   	int offset = 0;
>   	int vnet_hdr_len;
> @@ -2228,7 +2225,7 @@ static int packet_snd(struct socket *sock,
>   	 */
>
>   	if (saddr == NULL) {
> -		dev = po->prot_hook.dev;
> +		ifindex	= po->ifindex;
>   		proto	= po->num;
>   		addr	= NULL;
>   	} else {
> @@ -2237,12 +2234,12 @@ static int packet_snd(struct socket *sock,
>   			goto out;
>   		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
>   			goto out;
> +		ifindex	= saddr->sll_ifindex;
>   		proto	= saddr->sll_protocol;
>   		addr	= saddr->sll_addr;
> -		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
> -		need_rls_dev = true;
>   	}
>
> +	dev = dev_get_by_index(sock_net(sk), ifindex);
>   	err = -ENXIO;
>   	if (dev == NULL)
>   		goto out_unlock;
> @@ -2386,15 +2383,14 @@ static int packet_snd(struct socket *sock,
>   	if (err > 0 && (err = net_xmit_errno(err)) != 0)
>   		goto out_unlock;
>
> -	if (need_rls_dev)
> -		dev_put(dev);
> +	dev_put(dev);
>
>   	return len;
>
>   out_free:
>   	kfree_skb(skb);
>   out_unlock:
> -	if (dev && need_rls_dev)
> +	if (dev)
>   		dev_put(dev);
>   out:
>   	return err;
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] Revert "af-packet: Use existing netdev reference for bound sockets"
  2013-11-18  9:00 ` Daniel Borkmann
@ 2013-11-18 18:03   ` David Miller
  2013-11-18 23:39     ` Daniel Borkmann
  0 siblings, 1 reply; 9+ messages in thread
From: David Miller @ 2013-11-18 18:03 UTC (permalink / raw)
  To: dborkman; +Cc: noureddine, willemb, phil, edumazet, netdev, greearb

From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 18 Nov 2013 10:00:53 +0100

> On 11/18/2013 08:40 AM, Salam Noureddine wrote:
>> This reverts commit 827d978037d7d0bf0860481948c6d26ead10042f
>> ("af-packet: Use existing netdev reference for bound sockets")
>>
>> The patch introduced a race condition between packet_snd and
>> packet_notifier when a net_device is being unregistered. In the case
>> of
>> a bound socket, packet_notifier can drop the last reference to the
>> net_device and packet_snd might end up sending a packet over a freed
>> net_device.
> 
> So there's no other workaround possible like e.g. setting a flag in
> struct packet_sock so that in case our netdevice goes down, we just
> set the flag and if set, we return with -ENXIO in send path?
> Reverting this would decrease performance for everyone as we would
> then do the lookup every time we send a packet again.

Agreed, we should try first to find a reasonable fix instead of just
doing a knee-jerk revert of this change.

Thanks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] Revert "af-packet: Use existing netdev reference for bound sockets"
  2013-11-18 18:03   ` David Miller
@ 2013-11-18 23:39     ` Daniel Borkmann
  2013-11-19  0:17       ` Salam Noureddine
  0 siblings, 1 reply; 9+ messages in thread
From: Daniel Borkmann @ 2013-11-18 23:39 UTC (permalink / raw)
  To: David Miller; +Cc: noureddine, willemb, phil, edumazet, netdev, greearb

On 11/18/2013 07:03 PM, David Miller wrote:
> From: Daniel Borkmann <dborkman@redhat.com>
> Date: Mon, 18 Nov 2013 10:00:53 +0100
>
>> On 11/18/2013 08:40 AM, Salam Noureddine wrote:
>>> This reverts commit 827d978037d7d0bf0860481948c6d26ead10042f
>>> ("af-packet: Use existing netdev reference for bound sockets")
>>>
>>> The patch introduced a race condition between packet_snd and
>>> packet_notifier when a net_device is being unregistered. In the case
>>> of
>>> a bound socket, packet_notifier can drop the last reference to the
>>> net_device and packet_snd might end up sending a packet over a freed
>>> net_device.
>>
>> So there's no other workaround possible like e.g. setting a flag in
>> struct packet_sock so that in case our netdevice goes down, we just
>> set the flag and if set, we return with -ENXIO in send path?
>> Reverting this would decrease performance for everyone as we would
>> then do the lookup every time we send a packet again.
>
> Agreed, we should try first to find a reasonable fix instead of just
> doing a knee-jerk revert of this change.

Salam, could you give the below a try on your side ?

I tried reproducing this with trafgen, but couldn't so far, meaning
everything worked (before/after). Having that said, it could also be
that I'm currently just having a really crappy nic (asix) at hand.

Let me know, thanks !

 From 26caa771c0c7d53b28afafc76f1b91ab36a34e73 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 18 Nov 2013 23:31:11 +0100
Subject: [PATCH] packet: don't send packets after we unregistered proto hook

Salam reported a use after free bug in PF_PACKET that occurs when
we're sending out frames on a socket bound device and suddenly the
netdevice is being unregistered.

Salam says:

   Commit 827d9780 introduced a race condition between packet_snd
   and packet_notifier when a net_device is being unregistered. In
   the case of a bound socket, packet_notifier can drop the last
   reference to the net_device and packet_snd might end up sending
   a packet over a freed net_device.

To avoid reverting 827d9780, we could make use of po->running member
that gets reset when we're calling __unregister_prot_hook() in
packet_notifier() when we receive NETDEV_DOWN notification. In
that case we receive NETDEV_DOWN before NETDEV_UNREGISTER where
prot_hook.dev is set to NULL, so that we could make sure to
leave send path early with -ENETDOWN, which corresponds also
to our notification.

Fixes: 827d978037d7 ("af-packet: Use existing netdev reference for bound sockets.")
Reported-by: Salam Noureddine <noureddine@aristanetworks.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
---
  net/packet/af_packet.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2e8286b..e3f64f2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2094,7 +2094,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
  	reserve = dev->hard_header_len;

  	err = -ENETDOWN;
-	if (unlikely(!(dev->flags & IFF_UP)))
+	if (unlikely(!(dev->flags & IFF_UP) || !po->running))
  		goto out_put;

  	size_max = po->tx_ring.frame_size
@@ -2250,7 +2250,7 @@ static int packet_snd(struct socket *sock,
  		reserve = dev->hard_header_len;

  	err = -ENETDOWN;
-	if (!(dev->flags & IFF_UP))
+	if (unlikely(!(dev->flags & IFF_UP) || !po->running))
  		goto out_unlock;

  	if (po->has_vnet_hdr) {
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] Revert "af-packet: Use existing netdev reference for bound sockets"
  2013-11-18 23:39     ` Daniel Borkmann
@ 2013-11-19  0:17       ` Salam Noureddine
  2013-11-19  1:26         ` David Miller
  0 siblings, 1 reply; 9+ messages in thread
From: Salam Noureddine @ 2013-11-19  0:17 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: David Miller, Willem de Bruijn, Phil Sutter, Eric Dumazet, netdev,
	greearb

Hi Daniel,

Thanks for looking into this. It seems to me that just using a flag
won't help in avoiding the race condition
since packet_notifier can still run after the flag check. We need to
have some sort of locking to avoid it.
One possibility is to grab the po->bind_lock (which is taken in
packet_notifier) in packet_snd for bound
sockets, increment the refcnt for the net_device using dev_hold and
release the lock. Not sure if this is
too expensive though and would defeat the purpose of the original
patch. Another problem could be that it
might delay device unregistering if somebody is sending a lot of
packets on the socket.

Please let me know if I am missing something here and I would be happy
to try the patch if my analysis
is wrong.

Thanks,

Salam

On Mon, Nov 18, 2013 at 3:39 PM, Daniel Borkmann <dborkman@redhat.com> wrote:
> On 11/18/2013 07:03 PM, David Miller wrote:
>>
>> From: Daniel Borkmann <dborkman@redhat.com>
>> Date: Mon, 18 Nov 2013 10:00:53 +0100
>>
>>> On 11/18/2013 08:40 AM, Salam Noureddine wrote:
>>>>
>>>> This reverts commit 827d978037d7d0bf0860481948c6d26ead10042f
>>>> ("af-packet: Use existing netdev reference for bound sockets")
>>>>
>>>> The patch introduced a race condition between packet_snd and
>>>> packet_notifier when a net_device is being unregistered. In the case
>>>> of
>>>> a bound socket, packet_notifier can drop the last reference to the
>>>> net_device and packet_snd might end up sending a packet over a freed
>>>> net_device.
>>>
>>>
>>> So there's no other workaround possible like e.g. setting a flag in
>>> struct packet_sock so that in case our netdevice goes down, we just
>>> set the flag and if set, we return with -ENXIO in send path?
>>> Reverting this would decrease performance for everyone as we would
>>> then do the lookup every time we send a packet again.
>>
>>
>> Agreed, we should try first to find a reasonable fix instead of just
>> doing a knee-jerk revert of this change.
>
>
> Salam, could you give the below a try on your side ?
>
> I tried reproducing this with trafgen, but couldn't so far, meaning
> everything worked (before/after). Having that said, it could also be
> that I'm currently just having a really crappy nic (asix) at hand.
>
> Let me know, thanks !
>
> From 26caa771c0c7d53b28afafc76f1b91ab36a34e73 Mon Sep 17 00:00:00 2001
> From: Daniel Borkmann <dborkman@redhat.com>
> Date: Mon, 18 Nov 2013 23:31:11 +0100
> Subject: [PATCH] packet: don't send packets after we unregistered proto hook
>
> Salam reported a use after free bug in PF_PACKET that occurs when
> we're sending out frames on a socket bound device and suddenly the
> netdevice is being unregistered.
>
> Salam says:
>
>   Commit 827d9780 introduced a race condition between packet_snd
>
>   and packet_notifier when a net_device is being unregistered. In
>   the case of a bound socket, packet_notifier can drop the last
>   reference to the net_device and packet_snd might end up sending
>   a packet over a freed net_device.
>
> To avoid reverting 827d9780, we could make use of po->running member
> that gets reset when we're calling __unregister_prot_hook() in
> packet_notifier() when we receive NETDEV_DOWN notification. In
> that case we receive NETDEV_DOWN before NETDEV_UNREGISTER where
> prot_hook.dev is set to NULL, so that we could make sure to
> leave send path early with -ENETDOWN, which corresponds also
> to our notification.
>
> Fixes: 827d978037d7 ("af-packet: Use existing netdev reference for bound
> sockets.")
> Reported-by: Salam Noureddine <noureddine@aristanetworks.com>
> Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
> ---
>  net/packet/af_packet.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 2e8286b..e3f64f2 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -2094,7 +2094,7 @@ static int tpacket_snd(struct packet_sock *po, struct
> msghdr *msg)
>         reserve = dev->hard_header_len;
>
>         err = -ENETDOWN;
> -       if (unlikely(!(dev->flags & IFF_UP)))
> +       if (unlikely(!(dev->flags & IFF_UP) || !po->running))
>                 goto out_put;
>
>         size_max = po->tx_ring.frame_size
> @@ -2250,7 +2250,7 @@ static int packet_snd(struct socket *sock,
>                 reserve = dev->hard_header_len;
>
>         err = -ENETDOWN;
> -       if (!(dev->flags & IFF_UP))
> +       if (unlikely(!(dev->flags & IFF_UP) || !po->running))
>                 goto out_unlock;
>
>         if (po->has_vnet_hdr) {
> --
> 1.8.3.1
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] Revert "af-packet: Use existing netdev reference for bound sockets"
  2013-11-19  0:17       ` Salam Noureddine
@ 2013-11-19  1:26         ` David Miller
  2013-11-19  1:30           ` David Miller
  0 siblings, 1 reply; 9+ messages in thread
From: David Miller @ 2013-11-19  1:26 UTC (permalink / raw)
  To: noureddine; +Cc: dborkman, willemb, phil, edumazet, netdev, greearb

From: Salam Noureddine <noureddine@aristanetworks.com>
Date: Mon, 18 Nov 2013 16:17:02 -0800

> Thanks for looking into this. It seems to me that just using a flag
> won't help in avoiding the race condition since packet_notifier can
> still run after the flag check.

The bind_lock syncronizes everything.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] Revert "af-packet: Use existing netdev reference for bound sockets"
  2013-11-19  1:26         ` David Miller
@ 2013-11-19  1:30           ` David Miller
  0 siblings, 0 replies; 9+ messages in thread
From: David Miller @ 2013-11-19  1:30 UTC (permalink / raw)
  To: noureddine; +Cc: dborkman, willemb, phil, edumazet, netdev, greearb

From: David Miller <davem@davemloft.net>
Date: Mon, 18 Nov 2013 20:26:58 -0500 (EST)

> From: Salam Noureddine <noureddine@aristanetworks.com>
> Date: Mon, 18 Nov 2013 16:17:02 -0800
> 
>> Thanks for looking into this. It seems to me that just using a flag
>> won't help in avoiding the race condition since packet_notifier can
>> still run after the flag check.
> 
> The bind_lock syncronizes everything.

Well, it should, but it doesn't here.

So what we need is something that synchronizes the packet_notifier with
the sendmsg path, and will make sure the po->running state is stable
throughout the call.

That's all.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-11-19  1:30 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-11-18  7:40 [PATCH 1/1] Revert "af-packet: Use existing netdev reference for bound sockets" Salam Noureddine
2013-11-18  9:00 ` Daniel Borkmann
2013-11-18 18:03   ` David Miller
2013-11-18 23:39     ` Daniel Borkmann
2013-11-19  0:17       ` Salam Noureddine
2013-11-19  1:26         ` David Miller
2013-11-19  1:30           ` David Miller
  -- strict thread matches above, loose matches on Subject: below --
2013-11-18  5:32 Salam Noureddine
2013-11-18  7:42 ` Salam Noureddine

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).