netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net] af_packet: move notifier's packet_dev_mc out of rcu critical section
@ 2025-05-20 20:20 Stanislav Fomichev
  2025-05-21  2:41 ` Willem de Bruijn
  0 siblings, 1 reply; 5+ messages in thread
From: Stanislav Fomichev @ 2025-05-20 20:20 UTC (permalink / raw)
  To: netdev
  Cc: davem, edumazet, kuba, pabeni, willemdebruijn.kernel, horms,
	stfomichev, linux-kernel, syzbot+b191b5ccad8d7a986286

Calling `PACKET_ADD_MEMBERSHIP` on an ops-locked device can trigger
the `NETDEV_UNREGISTER` notifier, which may require disabling promiscuous
and/or allmulti mode. Both of these operations require acquiring the netdev
instance lock. Move the call to `packet_dev_mc` outside of the RCU critical
section.

Closes: https://syzkaller.appspot.com/bug?extid=b191b5ccad8d7a986286
Reported-by: syzbot+b191b5ccad8d7a986286@syzkaller.appspotmail.com
Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations")
Signed-off-by: Stanislav Fomichev <stfomichev@gmail.com>
---
 net/packet/af_packet.c | 20 +++++++++++++++-----
 net/packet/internal.h  |  1 +
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d4dba06297c3..5a6132816b2e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3713,15 +3713,15 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 }
 
 static void packet_dev_mclist_delete(struct net_device *dev,
-				     struct packet_mclist **mlp)
+				     struct packet_mclist **mlp,
+				     struct list_head *list)
 {
 	struct packet_mclist *ml;
 
 	while ((ml = *mlp) != NULL) {
 		if (ml->ifindex == dev->ifindex) {
-			packet_dev_mc(dev, ml, -1);
+			list_add(&ml->remove_list, list);
 			*mlp = ml->next;
-			kfree(ml);
 		} else
 			mlp = &ml->next;
 	}
@@ -4233,9 +4233,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 static int packet_notifier(struct notifier_block *this,
 			   unsigned long msg, void *ptr)
 {
-	struct sock *sk;
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct net *net = dev_net(dev);
+	struct packet_mclist *ml, *tmp;
+	LIST_HEAD(mclist);
+	struct sock *sk;
 
 	rcu_read_lock();
 	sk_for_each_rcu(sk, &net->packet.sklist) {
@@ -4244,7 +4246,8 @@ static int packet_notifier(struct notifier_block *this,
 		switch (msg) {
 		case NETDEV_UNREGISTER:
 			if (po->mclist)
-				packet_dev_mclist_delete(dev, &po->mclist);
+				packet_dev_mclist_delete(dev, &po->mclist,
+							 &mclist);
 			fallthrough;
 
 		case NETDEV_DOWN:
@@ -4277,6 +4280,13 @@ static int packet_notifier(struct notifier_block *this,
 		}
 	}
 	rcu_read_unlock();
+
+	/* packet_dev_mc might grab instance locks so can't run under rcu */
+	list_for_each_entry_safe(ml, tmp, &mclist, remove_list) {
+		packet_dev_mc(dev, ml, -1);
+		kfree(ml);
+	}
+
 	return NOTIFY_DONE;
 }
 
diff --git a/net/packet/internal.h b/net/packet/internal.h
index d5d70712007a..1e743d0316fd 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -11,6 +11,7 @@ struct packet_mclist {
 	unsigned short		type;
 	unsigned short		alen;
 	unsigned char		addr[MAX_ADDR_LEN];
+	struct list_head	remove_list;
 };
 
 /* kbdq - kernel block descriptor queue */
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net] af_packet: move notifier's packet_dev_mc out of rcu critical section
  2025-05-20 20:20 [PATCH net] af_packet: move notifier's packet_dev_mc out of rcu critical section Stanislav Fomichev
@ 2025-05-21  2:41 ` Willem de Bruijn
  2025-05-21  3:00   ` Jakub Kicinski
  0 siblings, 1 reply; 5+ messages in thread
From: Willem de Bruijn @ 2025-05-21  2:41 UTC (permalink / raw)
  To: Stanislav Fomichev, netdev
  Cc: davem, edumazet, kuba, pabeni, willemdebruijn.kernel, horms,
	stfomichev, linux-kernel, syzbot+b191b5ccad8d7a986286

Stanislav Fomichev wrote:
> Calling `PACKET_ADD_MEMBERSHIP` on an ops-locked device can trigger
> the `NETDEV_UNREGISTER` notifier,

This made it sound to me as if the notifier is called as a result of
the setsockopt.

If respinning, please rewrite to make clear that the two are
independent events. The stack trace in the bug also makes clear
that the notifier trigger is a device going away.

> which may require disabling promiscuous
> and/or allmulti mode. Both of these operations require acquiring the netdev
> instance lock. Move the call to `packet_dev_mc` outside of the RCU critical
> section.
> 
> Closes: https://syzkaller.appspot.com/bug?extid=b191b5ccad8d7a986286
> Reported-by: syzbot+b191b5ccad8d7a986286@syzkaller.appspotmail.com
> Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations")
> Signed-off-by: Stanislav Fomichev <stfomichev@gmail.com>
> ---
>  net/packet/af_packet.c | 20 +++++++++++++++-----
>  net/packet/internal.h  |  1 +
>  2 files changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index d4dba06297c3..5a6132816b2e 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -3713,15 +3713,15 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
>  }
>  
>  static void packet_dev_mclist_delete(struct net_device *dev,
> -				     struct packet_mclist **mlp)
> +				     struct packet_mclist **mlp,
> +				     struct list_head *list)
>  {
>  	struct packet_mclist *ml;
>  
>  	while ((ml = *mlp) != NULL) {
>  		if (ml->ifindex == dev->ifindex) {
> -			packet_dev_mc(dev, ml, -1);
> +			list_add(&ml->remove_list, list);
>  			*mlp = ml->next;
> -			kfree(ml);
>  		} else
>  			mlp = &ml->next;
>  	}
> @@ -4233,9 +4233,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
>  static int packet_notifier(struct notifier_block *this,
>  			   unsigned long msg, void *ptr)
>  {
> -	struct sock *sk;
>  	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
>  	struct net *net = dev_net(dev);
> +	struct packet_mclist *ml, *tmp;
> +	LIST_HEAD(mclist);
> +	struct sock *sk;
>  
>  	rcu_read_lock();
>  	sk_for_each_rcu(sk, &net->packet.sklist) {
> @@ -4244,7 +4246,8 @@ static int packet_notifier(struct notifier_block *this,
>  		switch (msg) {
>  		case NETDEV_UNREGISTER:
>  			if (po->mclist)
> -				packet_dev_mclist_delete(dev, &po->mclist);
> +				packet_dev_mclist_delete(dev, &po->mclist,
> +							 &mclist);
>  			fallthrough;
>  
>  		case NETDEV_DOWN:
> @@ -4277,6 +4280,13 @@ static int packet_notifier(struct notifier_block *this,
>  		}
>  	}
>  	rcu_read_unlock();
> +
> +	/* packet_dev_mc might grab instance locks so can't run under rcu */
> +	list_for_each_entry_safe(ml, tmp, &mclist, remove_list) {
> +		packet_dev_mc(dev, ml, -1);
> +		kfree(ml);
> +	}
> +

Just verifying my understanding of the not entirely obvious locking:

po->mclist modifications (add, del, flush, unregister) are all
protected by the RTNL, not the RCU. The RCU only protects the sklist
and by extension the sks on it. So moving the mclist operations out of
the RCU is fine.

The delayed operation on the mclist entry is still within the RTNL
from unregister_netdevice_notifier. Which matter as it protects not
only the list, but also the actual operations in packet_dev_mc, such
as inc/dec on dev->promiscuity and associated dev_change_rx_flags.
And new packet_mclist.remove_list too.

>  	return NOTIFY_DONE;
>  }
>  
> diff --git a/net/packet/internal.h b/net/packet/internal.h
> index d5d70712007a..1e743d0316fd 100644
> --- a/net/packet/internal.h
> +++ b/net/packet/internal.h
> @@ -11,6 +11,7 @@ struct packet_mclist {
>  	unsigned short		type;
>  	unsigned short		alen;
>  	unsigned char		addr[MAX_ADDR_LEN];
> +	struct list_head	remove_list;

INIT_LIST_HEAD on alloc in packet_mc_add?

>  };
>  
>  /* kbdq - kernel block descriptor queue */
> -- 
> 2.49.0
> 



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] af_packet: move notifier's packet_dev_mc out of rcu critical section
  2025-05-21  2:41 ` Willem de Bruijn
@ 2025-05-21  3:00   ` Jakub Kicinski
  2025-05-21  3:53     ` Willem de Bruijn
  0 siblings, 1 reply; 5+ messages in thread
From: Jakub Kicinski @ 2025-05-21  3:00 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Stanislav Fomichev, netdev, davem, edumazet, pabeni, horms,
	linux-kernel, syzbot+b191b5ccad8d7a986286

On Tue, 20 May 2025 22:41:30 -0400 Willem de Bruijn wrote:
> > @@ -4277,6 +4280,13 @@ static int packet_notifier(struct notifier_block *this,
> >  		}
> >  	}
> >  	rcu_read_unlock();
> > +
> > +	/* packet_dev_mc might grab instance locks so can't run under rcu */
> > +	list_for_each_entry_safe(ml, tmp, &mclist, remove_list) {
> > +		packet_dev_mc(dev, ml, -1);
> > +		kfree(ml);
> > +	}
> > +  
> 
> Just verifying my understanding of the not entirely obvious locking:
> 
> po->mclist modifications (add, del, flush, unregister) are all
> protected by the RTNL, not the RCU. The RCU only protects the sklist
> and by extension the sks on it. So moving the mclist operations out of
> the RCU is fine.
> 
> The delayed operation on the mclist entry is still within the RTNL
> from unregister_netdevice_notifier. Which matter as it protects not
> only the list, but also the actual operations in packet_dev_mc, such
> as inc/dec on dev->promiscuity and associated dev_change_rx_flags.
> And new packet_mclist.remove_list too.

Matches my understanding FWIW, but this will be a great addition 
to the commit message. Let's add it in v2..

> >  	return NOTIFY_DONE;
> >  }
> >  
> > diff --git a/net/packet/internal.h b/net/packet/internal.h
> > index d5d70712007a..1e743d0316fd 100644
> > --- a/net/packet/internal.h
> > +++ b/net/packet/internal.h
> > @@ -11,6 +11,7 @@ struct packet_mclist {
> >  	unsigned short		type;
> >  	unsigned short		alen;
> >  	unsigned char		addr[MAX_ADDR_LEN];
> > +	struct list_head	remove_list;  
> 
> INIT_LIST_HEAD on alloc in packet_mc_add?

Just to be clear this is an "entry node" not a "head node",
is it common to init "entry nodes"? 
-- 
for the commit msg:
pw-bot: cr

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] af_packet: move notifier's packet_dev_mc out of rcu critical section
  2025-05-21  3:00   ` Jakub Kicinski
@ 2025-05-21  3:53     ` Willem de Bruijn
  2025-05-21 16:54       ` Stanislav Fomichev
  0 siblings, 1 reply; 5+ messages in thread
From: Willem de Bruijn @ 2025-05-21  3:53 UTC (permalink / raw)
  To: Jakub Kicinski, Willem de Bruijn
  Cc: Stanislav Fomichev, netdev, davem, edumazet, pabeni, horms,
	linux-kernel, syzbot+b191b5ccad8d7a986286

Jakub Kicinski wrote:
> On Tue, 20 May 2025 22:41:30 -0400 Willem de Bruijn wrote:
> > > @@ -4277,6 +4280,13 @@ static int packet_notifier(struct notifier_block *this,
> > >  		}
> > >  	}
> > >  	rcu_read_unlock();
> > > +
> > > +	/* packet_dev_mc might grab instance locks so can't run under rcu */
> > > +	list_for_each_entry_safe(ml, tmp, &mclist, remove_list) {
> > > +		packet_dev_mc(dev, ml, -1);
> > > +		kfree(ml);
> > > +	}
> > > +  
> > 
> > Just verifying my understanding of the not entirely obvious locking:
> > 
> > po->mclist modifications (add, del, flush, unregister) are all
> > protected by the RTNL, not the RCU. The RCU only protects the sklist
> > and by extension the sks on it. So moving the mclist operations out of
> > the RCU is fine.
> > 
> > The delayed operation on the mclist entry is still within the RTNL
> > from unregister_netdevice_notifier. Which matter as it protects not
> > only the list, but also the actual operations in packet_dev_mc, such
> > as inc/dec on dev->promiscuity and associated dev_change_rx_flags.
> > And new packet_mclist.remove_list too.
> 
> Matches my understanding FWIW, but this will be a great addition 
> to the commit message. Let's add it in v2..
> 
> > >  	return NOTIFY_DONE;
> > >  }
> > >  
> > > diff --git a/net/packet/internal.h b/net/packet/internal.h
> > > index d5d70712007a..1e743d0316fd 100644
> > > --- a/net/packet/internal.h
> > > +++ b/net/packet/internal.h
> > > @@ -11,6 +11,7 @@ struct packet_mclist {
> > >  	unsigned short		type;
> > >  	unsigned short		alen;
> > >  	unsigned char		addr[MAX_ADDR_LEN];
> > > +	struct list_head	remove_list;  
> > 
> > INIT_LIST_HEAD on alloc in packet_mc_add?
> 
> Just to be clear this is an "entry node" not a "head node",
> is it common to init "entry nodes"? 

I wasn't sure. A small sample from net/core showed that many do, e.g.,
napi->poll_list. But not all, e.g., failover->list just calls
list_add_tail immediately.

I suspect, and from that it seems, that it is safe to not explicitly
initalize entry nodes if you know what you're doing / how they're
used.

But whether that is actually intended to work, especially with more
involved debugging (such as LIST_POISON) and invariant checking
(__list_add_valid), I don't know.

I did not find any authoritative documentation that says you have too,
so I guess it's fine. But not ideal.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] af_packet: move notifier's packet_dev_mc out of rcu critical section
  2025-05-21  3:53     ` Willem de Bruijn
@ 2025-05-21 16:54       ` Stanislav Fomichev
  0 siblings, 0 replies; 5+ messages in thread
From: Stanislav Fomichev @ 2025-05-21 16:54 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Jakub Kicinski, netdev, davem, edumazet, pabeni, horms,
	linux-kernel, syzbot+b191b5ccad8d7a986286

On 05/20, Willem de Bruijn wrote:
> Jakub Kicinski wrote:
> > On Tue, 20 May 2025 22:41:30 -0400 Willem de Bruijn wrote:
> > > > @@ -4277,6 +4280,13 @@ static int packet_notifier(struct notifier_block *this,
> > > >  		}
> > > >  	}
> > > >  	rcu_read_unlock();
> > > > +
> > > > +	/* packet_dev_mc might grab instance locks so can't run under rcu */
> > > > +	list_for_each_entry_safe(ml, tmp, &mclist, remove_list) {
> > > > +		packet_dev_mc(dev, ml, -1);
> > > > +		kfree(ml);
> > > > +	}
> > > > +  
> > > 
> > > Just verifying my understanding of the not entirely obvious locking:
> > > 
> > > po->mclist modifications (add, del, flush, unregister) are all
> > > protected by the RTNL, not the RCU. The RCU only protects the sklist
> > > and by extension the sks on it. So moving the mclist operations out of
> > > the RCU is fine.
> > > 
> > > The delayed operation on the mclist entry is still within the RTNL
> > > from unregister_netdevice_notifier. Which matter as it protects not
> > > only the list, but also the actual operations in packet_dev_mc, such
> > > as inc/dec on dev->promiscuity and associated dev_change_rx_flags.
> > > And new packet_mclist.remove_list too.
> > 
> > Matches my understanding FWIW, but this will be a great addition 
> > to the commit message. Let's add it in v2..
> > 
> > > >  	return NOTIFY_DONE;
> > > >  }
> > > >  
> > > > diff --git a/net/packet/internal.h b/net/packet/internal.h
> > > > index d5d70712007a..1e743d0316fd 100644
> > > > --- a/net/packet/internal.h
> > > > +++ b/net/packet/internal.h
> > > > @@ -11,6 +11,7 @@ struct packet_mclist {
> > > >  	unsigned short		type;
> > > >  	unsigned short		alen;
> > > >  	unsigned char		addr[MAX_ADDR_LEN];
> > > > +	struct list_head	remove_list;  
> > > 
> > > INIT_LIST_HEAD on alloc in packet_mc_add?
> > 
> > Just to be clear this is an "entry node" not a "head node",
> > is it common to init "entry nodes"? 
> 
> I wasn't sure. A small sample from net/core showed that many do, e.g.,
> napi->poll_list. But not all, e.g., failover->list just calls
> list_add_tail immediately.
> 
> I suspect, and from that it seems, that it is safe to not explicitly
> initalize entry nodes if you know what you're doing / how they're
> used.
> 
> But whether that is actually intended to work, especially with more
> involved debugging (such as LIST_POISON) and invariant checking
> (__list_add_valid), I don't know.
> 
> I did not find any authoritative documentation that says you have too,
> so I guess it's fine. But not ideal.

I can add the initialization to be safe and add more info to the commit
message, thank you both for the feedback!

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-05-21 16:54 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-20 20:20 [PATCH net] af_packet: move notifier's packet_dev_mc out of rcu critical section Stanislav Fomichev
2025-05-21  2:41 ` Willem de Bruijn
2025-05-21  3:00   ` Jakub Kicinski
2025-05-21  3:53     ` Willem de Bruijn
2025-05-21 16:54       ` Stanislav Fomichev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).