netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Kuniyuki Iwashima <kuniyu@amazon.com>
To: "David S. Miller" <davem@davemloft.net>,
	David Ahern <dsahern@kernel.org>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>,
	"Paolo Abeni" <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>,
	Kuniyuki Iwashima <kuniyu@amazon.com>,
	Kuniyuki Iwashima <kuni1840@gmail.com>, <netdev@vger.kernel.org>
Subject: [PATCH v3 net-next 11/15] ipv6: Factorise ip6_route_multipath_add().
Date: Thu, 17 Apr 2025 17:03:52 -0700	[thread overview]
Message-ID: <20250418000443.43734-12-kuniyu@amazon.com> (raw)
In-Reply-To: <20250418000443.43734-1-kuniyu@amazon.com>

We will get rid of RTNL from RTM_NEWROUTE and SIOCADDRT and rely
on RCU to guarantee dev and nexthop lifetime.

Then, the RCU section will start before ip6_route_info_create_nh()
in ip6_route_multipath_add(), but ip6_route_info_create() is called
in the same loop and will sleep.

Let's split the loop into ip6_route_mpath_info_create() and
ip6_route_mpath_info_create_nh().

Note that ip6_route_info_append() is now integrated into
ip6_route_mpath_info_create_nh() because we need to call different
free functions for nexthops that passed ip6_route_info_create_nh().

In case of failure, the remaining nexthops that ip6_route_info_create_nh()
has not been called for will be freed by ip6_route_mpath_info_cleanup().

OTOH, if a nexthop passes ip6_route_info_create_nh(), it will be linked
to a local temporary list, which will be spliced back to rt6_nh_list.
In case of failure, these nexthops will be released by fib6_info_release()
in ip6_route_multipath_add().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 net/ipv6/route.c | 205 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 130 insertions(+), 75 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 05e33d319488..c8c1c75268e3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5315,29 +5315,131 @@ struct rt6_nh {
 	struct fib6_info *fib6_info;
 	struct fib6_config r_cfg;
 	struct list_head list;
+	int weight;
 };
 
-static int ip6_route_info_append(struct list_head *rt6_nh_list,
-				 struct fib6_info *rt,
-				 struct fib6_config *r_cfg)
+static void ip6_route_mpath_info_cleanup(struct list_head *rt6_nh_list)
 {
-	struct rt6_nh *nh;
-	int err = -EEXIST;
+	struct rt6_nh *nh, *nh_next;
 
-	list_for_each_entry(nh, rt6_nh_list, list) {
-		/* check if fib6_info already exists */
-		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
-			return err;
+	list_for_each_entry_safe(nh, nh_next, rt6_nh_list, list) {
+		struct fib6_info *rt = nh->fib6_info;
+
+		if (rt) {
+			free_percpu(rt->fib6_nh->nh_common.nhc_pcpu_rth_output);
+			free_percpu(rt->fib6_nh->rt6i_pcpu);
+			ip_fib_metrics_put(rt->fib6_metrics);
+			kfree(rt);
+		}
+
+		list_del(&nh->list);
+		kfree(nh);
 	}
+}
 
-	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
-	if (!nh)
-		return -ENOMEM;
-	nh->fib6_info = rt;
-	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
-	list_add_tail(&nh->list, rt6_nh_list);
+static int ip6_route_mpath_info_create(struct list_head *rt6_nh_list,
+				       struct fib6_config *cfg,
+				       struct netlink_ext_ack *extack)
+{
+	struct rtnexthop *rtnh;
+	int remaining;
+	int err;
+
+	remaining = cfg->fc_mp_len;
+	rtnh = (struct rtnexthop *)cfg->fc_mp;
+
+	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
+	 * fib6_info structs per nexthop
+	 */
+	while (rtnh_ok(rtnh, remaining)) {
+		struct fib6_config r_cfg;
+		struct fib6_info *rt;
+		struct rt6_nh *nh;
+		int attrlen;
+
+		nh = kzalloc(sizeof(*nh), GFP_KERNEL);
+		if (!nh) {
+			err = -ENOMEM;
+			goto err;
+		}
+
+		list_add_tail(&nh->list, rt6_nh_list);
+
+		memcpy(&r_cfg, cfg, sizeof(*cfg));
+		if (rtnh->rtnh_ifindex)
+			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
+
+		attrlen = rtnh_attrlen(rtnh);
+		if (attrlen > 0) {
+			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+			if (nla) {
+				r_cfg.fc_gateway = nla_get_in6_addr(nla);
+				r_cfg.fc_flags |= RTF_GATEWAY;
+			}
+
+			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
+			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
+			if (nla)
+				r_cfg.fc_encap_type = nla_get_u16(nla);
+		}
+
+		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
+
+		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
+		if (IS_ERR(rt)) {
+			err = PTR_ERR(rt);
+			goto err;
+		}
+
+		nh->fib6_info = rt;
+		nh->weight = rtnh->rtnh_hops + 1;
+		memcpy(&nh->r_cfg, &r_cfg, sizeof(r_cfg));
+
+		rtnh = rtnh_next(rtnh, &remaining);
+	}
 
 	return 0;
+err:
+	ip6_route_mpath_info_cleanup(rt6_nh_list);
+	return err;
+}
+
+static int ip6_route_mpath_info_create_nh(struct list_head *rt6_nh_list,
+					  struct netlink_ext_ack *extack)
+{
+	struct rt6_nh *nh, *nh_next, *nh_tmp;
+	LIST_HEAD(tmp);
+	int err;
+
+	list_for_each_entry_safe(nh, nh_next, rt6_nh_list, list) {
+		struct fib6_info *rt = nh->fib6_info;
+
+		err = ip6_route_info_create_nh(rt, &nh->r_cfg, extack);
+		if (err) {
+			nh->fib6_info = NULL;
+			goto err;
+		}
+
+		rt->fib6_nh->fib_nh_weight = nh->weight;
+
+		list_move_tail(&nh->list, &tmp);
+
+		list_for_each_entry(nh_tmp, rt6_nh_list, list) {
+			/* check if fib6_info already exists */
+			if (rt6_duplicate_nexthop(nh_tmp->fib6_info, rt)) {
+				err = -EEXIST;
+				goto err;
+			}
+		}
+	}
+out:
+	list_splice(&tmp, rt6_nh_list);
+	return err;
+err:
+	ip6_route_mpath_info_cleanup(rt6_nh_list);
+	goto out;
 }
 
 static void ip6_route_mpath_notify(struct fib6_info *rt,
@@ -5396,75 +5498,28 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 {
 	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
 	struct nl_info *info = &cfg->fc_nlinfo;
-	struct fib6_config r_cfg;
-	struct rtnexthop *rtnh;
-	struct fib6_info *rt;
-	struct rt6_nh *err_nh;
 	struct rt6_nh *nh, *nh_safe;
+	LIST_HEAD(rt6_nh_list);
+	struct rt6_nh *err_nh;
 	__u16 nlflags;
-	int remaining;
-	int attrlen;
-	int err = 1;
 	int nhn = 0;
-	int replace = (cfg->fc_nlinfo.nlh &&
-		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
-	LIST_HEAD(rt6_nh_list);
+	int replace;
+	int err;
+
+	replace = (cfg->fc_nlinfo.nlh &&
+		   (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
 
 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
 		nlflags |= NLM_F_APPEND;
 
-	remaining = cfg->fc_mp_len;
-	rtnh = (struct rtnexthop *)cfg->fc_mp;
-
-	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
-	 * fib6_info structs per nexthop
-	 */
-	while (rtnh_ok(rtnh, remaining)) {
-		memcpy(&r_cfg, cfg, sizeof(*cfg));
-		if (rtnh->rtnh_ifindex)
-			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
-
-		attrlen = rtnh_attrlen(rtnh);
-		if (attrlen > 0) {
-			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
-
-			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
-			if (nla) {
-				r_cfg.fc_gateway = nla_get_in6_addr(nla);
-				r_cfg.fc_flags |= RTF_GATEWAY;
-			}
-
-			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
-			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
-			if (nla)
-				r_cfg.fc_encap_type = nla_get_u16(nla);
-		}
-
-		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
-		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
-		if (IS_ERR(rt)) {
-			err = PTR_ERR(rt);
-			rt = NULL;
-			goto cleanup;
-		}
-
-		err = ip6_route_info_create_nh(rt, &r_cfg, extack);
-		if (err) {
-			rt = NULL;
-			goto cleanup;
-		}
-
-		rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
-
-		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
-		if (err) {
-			fib6_info_release(rt);
-			goto cleanup;
-		}
+	err = ip6_route_mpath_info_create(&rt6_nh_list, cfg, extack);
+	if (err)
+		return err;
 
-		rtnh = rtnh_next(rtnh, &remaining);
-	}
+	err = ip6_route_mpath_info_create_nh(&rt6_nh_list, extack);
+	if (err)
+		goto cleanup;
 
 	/* for add and replace send one notification with all nexthops.
 	 * Skip the notification in fib6_add_rt2node and send one with
-- 
2.49.0


  parent reply	other threads:[~2025-04-18  0:09 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-18  0:03 [PATCH v3 net-next 00/15] ipv6: No RTNL for IPv6 routing table Kuniyuki Iwashima
2025-04-18  0:03 ` [PATCH v3 net-next 01/15] ipv6: Validate RTA_GATEWAY of RTA_MULTIPATH in rtm_to_fib6_config() Kuniyuki Iwashima
2025-04-18  0:03 ` [PATCH v3 net-next 02/15] ipv6: Get rid of RTNL for SIOCDELRT and RTM_DELROUTE Kuniyuki Iwashima
2025-04-18  0:03 ` [PATCH v3 net-next 03/15] ipv6: Move some validation from ip6_route_info_create() to rtm_to_fib6_config() Kuniyuki Iwashima
2025-04-29  0:24   ` Lai, Yi
2025-04-29  1:20     ` Kuniyuki Iwashima
2025-04-18  0:03 ` [PATCH v3 net-next 04/15] ipv6: Check GATEWAY in rtm_to_fib6_multipath_config() Kuniyuki Iwashima
2025-04-18  0:03 ` [PATCH v3 net-next 05/15] ipv6: Move nexthop_find_by_id() after fib6_info_alloc() Kuniyuki Iwashima
2025-04-18  0:03 ` [PATCH v3 net-next 06/15] ipv6: Split ip6_route_info_create() Kuniyuki Iwashima
2025-04-18  0:03 ` [PATCH v3 net-next 07/15] ipv6: Preallocate rt->fib6_nh->rt6i_pcpu in ip6_route_info_create() Kuniyuki Iwashima
2025-04-18  0:03 ` [PATCH v3 net-next 08/15] ipv6: Preallocate nhc_pcpu_rth_output " Kuniyuki Iwashima
2025-04-18  0:03 ` [PATCH v3 net-next 09/15] ipv6: Don't pass net to ip6_route_info_append() Kuniyuki Iwashima
2025-04-18  0:03 ` [PATCH v3 net-next 10/15] ipv6: Rename rt6_nh.next to rt6_nh.list Kuniyuki Iwashima
2025-04-18  0:03 ` Kuniyuki Iwashima [this message]
2025-04-18  0:03 ` [PATCH v3 net-next 12/15] ipv6: Protect fib6_link_table() with spinlock Kuniyuki Iwashima
2025-04-18  0:03 ` [PATCH v3 net-next 13/15] ipv6: Defer fib6_purge_rt() in fib6_add_rt2node() to fib6_add() Kuniyuki Iwashima
2025-04-18  0:03 ` [PATCH v3 net-next 14/15] ipv6: Protect nh->f6i_list with spinlock and flag Kuniyuki Iwashima
2025-04-18  0:03 ` [PATCH v3 net-next 15/15] ipv6: Get rid of RTNL for SIOCADDRT and RTM_NEWROUTE Kuniyuki Iwashima
2025-05-04  9:16   ` Eric Dumazet
2025-05-04 17:20     ` Kuniyuki Iwashima
2025-05-04 19:34       ` Eric Dumazet
2025-05-04 20:11         ` Kuniyuki Iwashima
2025-04-24  7:50 ` [PATCH v3 net-next 00/15] ipv6: No RTNL for IPv6 routing table patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250418000443.43734-12-kuniyu@amazon.com \
    --to=kuniyu@amazon.com \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=kuba@kernel.org \
    --cc=kuni1840@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).