Netdev List
 help / color / mirror / Atom feed
From: Kuniyuki Iwashima <kuniyu@google.com>
To: "David S . Miller" <davem@davemloft.net>,
	David Ahern <dsahern@kernel.org>,
	 Eric Dumazet <edumazet@google.com>,
	Ido Schimmel <idosch@nvidia.com>,
	 Jakub Kicinski <kuba@kernel.org>,
	Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>,
	Kuniyuki Iwashima <kuniyu@google.com>,
	 Kuniyuki Iwashima <kuni1840@gmail.com>,
	netdev@vger.kernel.org
Subject: [PATCH v3 net-next 08/15] ip6mr: Free mr_table after RCU grace period.
Date: Thu,  4 Jun 2026 22:46:26 +0000	[thread overview]
Message-ID: <20260604224712.3209821-9-kuniyu@google.com> (raw)
In-Reply-To: <20260604224712.3209821-1-kuniyu@google.com>

Since default_device_exit_batch() is called after ->exit_rtnl(),
idev->mc_ifc_work could finally call mroute6_is_socket() under RCU
while ->exit_rtnl() is running. [0]

With CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=n, ip6mr_fib_lookup() does
not check if net->ipv6.mrt6 is NULL.  If ip6mr_net_exit_batch()
set net->ipv6.mrt6 to NULL and freed it, the mrt->mroute_sk access
could result in null-ptr-deref or use-after-free.

Let's prepare for that situation by applying RCU rule to ip6mr
table similarly.

!check_net(net) is added in ip6mr_cache_unresolved() and
mroute_clean_tables() to synchronise the two by mfc_unres_lock
so that ip6mr_cache_unresolved() will not queue skb after
mroute_clean_tables() purged &mrt->mfc_unres_queue.

rcu_read_lock() in reg_vif_xmit() is moved up to cover
ip6mr_fib_lookup() as with ipmr.

Link: https://lore.kernel.org/netdev/20260407184202.34cfe2d6@kernel.org/ #[0]
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
---
 net/ipv6/ip6mr.c | 121 +++++++++++++++++++++++++++--------------------
 1 file changed, 69 insertions(+), 52 deletions(-)

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8c8ad1753c75..ddbe06397d9c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -136,16 +136,6 @@ static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
 	return NULL;
 }
 
-static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
-{
-	struct mr_table *mrt;
-
-	rcu_read_lock();
-	mrt = __ip6mr_get_table(net, id);
-	rcu_read_unlock();
-	return mrt;
-}
-
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 			    struct mr_table **mrt)
 {
@@ -274,7 +264,7 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 
 	ASSERT_RTNL();
 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
-		list_del(&mrt->list);
+		list_del_rcu(&mrt->list);
 		ip6mr_free_table(mrt);
 	}
 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
@@ -298,28 +288,30 @@ bool ip6mr_rule_default(const struct fib_rule *rule)
 }
 EXPORT_SYMBOL(ip6mr_rule_default);
 #else
-#define ip6mr_for_each_table(mrt, net) \
-	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
-
 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 					    struct mr_table *mrt)
 {
 	if (!mrt)
-		return net->ipv6.mrt6;
+		return rcu_dereference(net->ipv6.mrt6);
 	return NULL;
 }
 
-static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
 {
-	return net->ipv6.mrt6;
+	return rcu_dereference_check(net->ipv6.mrt6,
+				     lockdep_rtnl_is_held() ||
+				     !rcu_access_pointer(net->ipv6.mrt6));
 }
 
-#define __ip6mr_get_table ip6mr_get_table
+#define ip6mr_for_each_table(mrt, net)				\
+	for (mrt = __ip6mr_get_table(net, 0); mrt; mrt = NULL)
 
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 			    struct mr_table **mrt)
 {
-	*mrt = net->ipv6.mrt6;
+	*mrt = rcu_dereference(net->ipv6.mrt6);
+	if (!*mrt)
+		return -EAGAIN;
 	return 0;
 }
 
@@ -330,15 +322,19 @@ static int __net_init ip6mr_rules_init(struct net *net)
 	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 	if (IS_ERR(mrt))
 		return PTR_ERR(mrt);
-	net->ipv6.mrt6 = mrt;
+
+	rcu_assign_pointer(net->ipv6.mrt6, mrt);
 	return 0;
 }
 
 static void __net_exit ip6mr_rules_exit(struct net *net)
 {
+	struct mr_table *mrt = rcu_dereference_protected(net->ipv6.mrt6, 1);
+
 	ASSERT_RTNL();
-	ip6mr_free_table(net->ipv6.mrt6);
-	net->ipv6.mrt6 = NULL;
+
+	RCU_INIT_POINTER(net->ipv6.mrt6, NULL);
+	ip6mr_free_table(mrt);
 }
 
 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -353,6 +349,17 @@ static unsigned int ip6mr_rules_seq_read(const struct net *net)
 }
 #endif
 
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+{
+	struct mr_table *mrt;
+
+	rcu_read_lock();
+	mrt = __ip6mr_get_table(net, id);
+	rcu_read_unlock();
+
+	return mrt;
+}
+
 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
 			  const void *ptr)
 {
@@ -411,8 +418,8 @@ static void ip6mr_free_table(struct mr_table *mrt)
 	timer_shutdown_sync(&mrt->ipmr_expire_timer);
 	mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
 				 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
-	rhltable_destroy(&mrt->mfc_hash);
-	kfree(mrt);
+
+	mr_table_free(mrt);
 }
 
 #ifdef CONFIG_PROC_FS
@@ -623,18 +630,22 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 	if (!pskb_inet_may_pull(skb))
 		goto tx_err;
 
+	rcu_read_lock();
+
 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
-		goto tx_err;
+		goto tx_lookup_err;
 
 	DEV_STATS_ADD(dev, tx_bytes, skb->len);
 	DEV_STATS_INC(dev, tx_packets);
-	rcu_read_lock();
+
 	ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
 			   MRT6MSG_WHOLEPKT);
 	rcu_read_unlock();
 	kfree_skb(skb);
 	return NETDEV_TX_OK;
 
+tx_lookup_err:
+	rcu_read_unlock();
 tx_err:
 	DEV_STATS_INC(dev, tx_errors);
 	kfree_skb(skb);
@@ -1157,11 +1168,18 @@ static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
 				  struct sk_buff *skb, struct net_device *dev)
 {
-	struct mfc6_cache *c;
+	struct net *net = read_pnet(&mrt->net);
+	struct mfc6_cache *c = NULL;
 	bool found = false;
 	int err;
 
 	spin_lock_bh(&mfc_unres_lock);
+
+	if (!check_net(net)) {
+		err = -EINVAL;
+		goto err;
+	}
+
 	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
@@ -1177,10 +1195,8 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
 
 		c = ip6mr_cache_alloc_unres();
 		if (!c) {
-			spin_unlock_bh(&mfc_unres_lock);
-
-			kfree_skb(skb);
-			return -ENOBUFS;
+			err = -ENOBUFS;
+			goto err;
 		}
 
 		/* Fill in the new cache entry */
@@ -1192,16 +1208,8 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
 		 *	Reflect first query at pim6sd
 		 */
 		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
-		if (err < 0) {
-			/* If the report failed throw the cache entry
-			   out - Brad Parker
-			 */
-			spin_unlock_bh(&mfc_unres_lock);
-
-			ip6mr_cache_free(c);
-			kfree_skb(skb);
-			return err;
-		}
+		if (err < 0)
+			goto err;
 
 		atomic_inc(&mrt->cache_resolve_queue_len);
 		list_add(&c->_c.list, &mrt->mfc_unres_queue);
@@ -1212,18 +1220,26 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
 
 	/* See if we can append the packet */
 	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
-		kfree_skb(skb);
+		c = NULL;
 		err = -ENOBUFS;
-	} else {
-		if (dev) {
-			skb->dev = dev;
-			skb->skb_iif = dev->ifindex;
-		}
-		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
-		err = 0;
+		goto err;
+	}
+
+	if (dev) {
+		skb->dev = dev;
+		skb->skb_iif = dev->ifindex;
 	}
 
+	skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
+
+	spin_unlock_bh(&mfc_unres_lock);
+	return 0;
+
+err:
 	spin_unlock_bh(&mfc_unres_lock);
+	if (c)
+		ip6mr_cache_free(c);
+	kfree_skb(skb);
 	return err;
 }
 
@@ -1534,6 +1550,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
 
 static void mroute_clean_tables(struct mr_table *mrt, int flags)
 {
+	struct net *net = read_pnet(&mrt->net);
 	struct mr_mfc *c, *tmp;
 	LIST_HEAD(list);
 	int i;
@@ -1558,8 +1575,7 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags)
 				continue;
 			rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
 			list_del_rcu(&c->list);
-			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
-						       FIB_EVENT_ENTRY_DEL,
+			call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
 						       (struct mfc6_cache *)c, mrt->id);
 			mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
 			mr_cache_put(c);
@@ -1567,7 +1583,8 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags)
 	}
 
 	if (flags & MRT6_FLUSH_MFC) {
-		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+		if (atomic_read(&mrt->cache_resolve_queue_len) != 0 ||
+		    !check_net(net)) {
 			spin_lock_bh(&mfc_unres_lock);
 			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
 				list_del(&c->list);
-- 
2.54.0.1032.g2f8565e1d1-goog


  parent reply	other threads:[~2026-06-04 22:47 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-04 22:46 [PATCH v3 net-next 00/15] ip6mr: No RTNL for RTNL_FAMILY_IP6MR rtnetlink Kuniyuki Iwashima
2026-06-04 22:46 ` [PATCH v3 net-next 01/15] selftest: net: Extend ipmr.c for IP6MR Kuniyuki Iwashima
2026-06-04 22:46 ` [PATCH v3 net-next 02/15] ip6mr: Annotate access to mrt->mroute_do_{pim,assert,wrvifwhole} Kuniyuki Iwashima
2026-06-04 22:46 ` [PATCH v3 net-next 03/15] ip6mr: Use MAXMIFS in mr6_msgsize() Kuniyuki Iwashima
2026-06-04 22:46 ` [PATCH v3 net-next 04/15] ip6mr: Allocate skb earlier in ip6mr_rtm_getroute() Kuniyuki Iwashima
2026-06-04 22:46 ` [PATCH v3 net-next 05/15] ip6mr: Convert ip6mr_rtm_getroute() to RCU Kuniyuki Iwashima
2026-06-04 22:46 ` [PATCH v3 net-next 06/15] ip6mr: Convert ip6mr_rtm_dumproute() " Kuniyuki Iwashima
2026-06-04 22:46 ` [PATCH v3 net-next 07/15] net: Remove rtnl_held of struct fib_dump_filter Kuniyuki Iwashima
2026-06-04 22:46 ` Kuniyuki Iwashima [this message]
2026-06-04 22:46 ` [PATCH v3 net-next 09/15] ip6mr: Call fib_rules_unregister() without RTNL Kuniyuki Iwashima
2026-06-04 22:46 ` [PATCH v3 net-next 10/15] ip6mr: Move unregister_netdevice_many() out of mroute_clean_tables() Kuniyuki Iwashima
2026-06-04 22:46 ` [PATCH v3 net-next 11/15] ip6mr: Move unregister_netdevice_many() out of ip6mr_free_table() Kuniyuki Iwashima
2026-06-04 22:46 ` [PATCH v3 net-next 12/15] ip6mr: Convert ip6mr_net_exit_batch() to ->exit_rtnl() Kuniyuki Iwashima
2026-06-04 22:46 ` [PATCH v3 net-next 13/15] ip6mr: Remove RTNL in ip6mr_rules_init() and ip6mr_net_init() Kuniyuki Iwashima
2026-06-04 22:46 ` [PATCH v3 net-next 14/15] ip6mr: Replace RTNL with a dedicated mutex for MFC Kuniyuki Iwashima
2026-06-04 22:46 ` [PATCH v3 net-next 15/15] ip6mr: Define net->ipv6.{ip6mr_notifier_ops,ipmr_seq} under CONFIG_IPV6_MROUTE Kuniyuki Iwashima
2026-06-09  0:20 ` [PATCH v3 net-next 00/15] ip6mr: No RTNL for RTNL_FAMILY_IP6MR rtnetlink patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260604224712.3209821-9-kuniyu@google.com \
    --to=kuniyu@google.com \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=idosch@nvidia.com \
    --cc=kuba@kernel.org \
    --cc=kuni1840@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox