From: dsahern@kernel.org
To: netdev@vger.kernel.org
Cc: roopa@cumulusnetworks.com, sharpd@cumulusnetworks.com,
idosch@mellanox.com, davem@davemloft.net,
David Ahern <dsahern@gmail.com>
Subject: [PATCH RFC net-next 16/18] net/ipv6: Allow routes to use nexthop objects
Date: Fri, 31 Aug 2018 17:49:51 -0700 [thread overview]
Message-ID: <20180901004954.7145-17-dsahern@kernel.org> (raw)
In-Reply-To: <20180901004954.7145-1-dsahern@kernel.org>
From: David Ahern <dsahern@gmail.com>
Allow users to specify a nexthop id to use with a route.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/ip6_fib.h | 4 +++
include/net/nexthop.h | 3 ++
net/ipv4/nexthop.c | 5 +++
net/ipv6/addrconf.c | 3 ++
net/ipv6/ip6_fib.c | 17 ++++++++---
net/ipv6/ndisc.c | 2 ++
net/ipv6/route.c | 85 +++++++++++++++++++++++++++++++++++++++++----------
7 files changed, 98 insertions(+), 21 deletions(-)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1f04a26e4c65..170aadcd83b4 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -52,6 +52,7 @@ struct fib6_config {
u16 fc_type; /* only 8 bits are used */
u16 fc_delete_all_nh : 1,
__unused : 15;
+ u32 fc_nh_id;
struct in6_addr fc_dst;
struct in6_addr fc_src;
@@ -139,6 +140,8 @@ struct fib6_info {
struct fib6_info __rcu *fib6_next;
struct fib6_node __rcu *fib6_node;
+ struct list_head nh_list;
+
/* Multipath routes:
* siblings is a list of fib6_info that have the the same metric/weight,
* destination, but not the same gateway. nsiblings is just a cache
@@ -171,6 +174,7 @@ struct fib6_info {
unused:3;
struct rcu_head rcu;
+ struct nexthop *nh;
struct fib6_nh fib6_nh[0];
};
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index dae1518af3f3..759bb39e4ea7 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -175,6 +175,9 @@ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
static inline struct fib6_nh *fib6_info_nh(struct fib6_info *f6i)
{
+ if (f6i->nh)
+ return nexthop_fib6_nh(f6i->nh);
+
return f6i->fib6_nh;
}
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index d1fc3d21af86..1e77fa94e562 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -317,6 +317,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
{
+ struct fib6_info *f6i, *tmp;
struct fib_info *fi;
bool do_flush;
@@ -328,6 +329,10 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
if (do_flush)
fib_flush(net);
+
+ list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
+ ip6_del_rt(net, f6i);
+ }
}
/* called on insert failure too */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index da5102bff2a9..8131cdd472cb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2366,6 +2366,9 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
goto out;
for_each_fib6_node_rt_rcu(fn) {
+ /* prefix routes do not use nexthop objects */
+ if (rt->nh)
+ continue;
if (rt->fib6_nh->nh_dev->ifindex != dev->ifindex)
continue;
if ((rt->fib6_flags & flags) != flags)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5b0ca5b3710d..b6dc644a55cf 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -202,7 +202,10 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
}
}
- fib6_nh_release(f6i->fib6_nh);
+ if (f6i->nh)
+ nexthop_put(f6i->nh);
+ else
+ fib6_nh_release(f6i->fib6_nh);
m = f6i->fib6_metrics;
if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
@@ -1302,6 +1305,8 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (!err) {
__fib6_update_sernum_upto_root(rt, sernum);
fib6_start_gc(info->nl_net, rt);
+ if (rt->nh)
+ list_add(&rt->nh_list, &rt->nh->f6i_list);
}
out:
@@ -1776,6 +1781,9 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
fib6_purge_rt(rt, fn, net);
+ if (rt->nh)
+ list_del(&rt->nh_list);
+
call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
@@ -2251,7 +2259,6 @@ void fib6_gc_cleanup(void)
static int ipv6_route_seq_show(struct seq_file *seq, void *v)
{
struct fib6_info *rt = v;
- struct fib6_nh *fib6_nh = rt->fib6_nh;
struct ipv6_route_iter *iter = seq->private;
const struct net_device *dev;
@@ -2262,12 +2269,12 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
- if (rt->fib6_flags & RTF_GATEWAY)
- seq_printf(seq, "%pi6", &fib6_nh->nh_gw);
+ if (!rt->nh && rt->fib6_flags & RTF_GATEWAY)
+ seq_printf(seq, "%pi6", &rt->fib6_nh->nh_gw);
else
seq_puts(seq, "00000000000000000000000000000000");
- dev = fib6_nh->nh_dev;
+ dev = rt->nh ? NULL : rt->fib6_nh->nh_dev;
seq_printf(seq, " %08x %08x %08x %08x %8s\n",
rt->fib6_metric, atomic_read(&rt->fib6_ref), 0,
rt->fib6_flags, dev ? dev->name : "");
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 4bc47b9db35b..1a6b71873dd3 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1277,6 +1277,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
if (rt) {
+ /* routes added from RAs do not use nexthop objects */
fib6_nh = rt->fib6_nh;
neigh = ip6_neigh_lookup(&fib6_nh->nh_gw, fib6_nh->nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
@@ -1307,6 +1308,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
+ /* routes added from RAs do not use nexthop objects */
fib6_nh = rt->fib6_nh;
neigh = ip6_neigh_lookup(&fib6_nh->nh_gw, fib6_nh->nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2c140ce95eb4..217be2c72b69 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -439,6 +439,11 @@ struct fib6_info *fib6_multipath_select(const struct net *net,
if (!fl6->mp_hash)
fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
+ if (match->nh) {
+ // TO-DO:
+ return match;
+ }
+
if (fl6->mp_hash <= atomic_read(&match->fib6_nh->nh_upper_bound))
return match;
@@ -661,13 +666,15 @@ static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
int m;
bool match_do_rr = false;
- if (rt->fib6_nh->nh_flags & RTNH_F_DEAD)
- goto out;
+ if (!rt->nh) {
+ if (rt->fib6_nh->nh_flags & RTNH_F_DEAD)
+ goto out;
- if (fib6_ignore_linkdown(rt) &&
- rt->fib6_nh->nh_flags & RTNH_F_LINKDOWN &&
- !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
- goto out;
+ if (fib6_ignore_linkdown(rt) &&
+ rt->fib6_nh->nh_flags & RTNH_F_LINKDOWN &&
+ !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
+ goto out;
+ }
if (fib6_check_expired(rt))
goto out;
@@ -3064,6 +3071,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
{
struct net *net = cfg->fc_nlinfo.nl_net;
struct fib6_info *rt = NULL;
+ struct nexthop *nh = NULL;
struct fib6_table *table;
int err = -EINVAL;
@@ -3099,6 +3107,15 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
#endif
+ if (cfg->fc_nh_id) {
+ nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+ if (!nh) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid nexthop id - nexthop does not exist");
+ goto out;
+ }
+ }
+
if (cfg->fc_metric == 0)
cfg->fc_metric = IP6_RT_PRIO_USER;
@@ -3118,7 +3135,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
err = -ENOMEM;
- rt = fib6_info_alloc(gfp_flags, true);
+ rt = fib6_info_alloc(gfp_flags, !nh);
if (!rt)
goto out;
@@ -3152,9 +3169,16 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->fib6_src.plen = cfg->fc_src_len;
#endif
- err = fib6_nh_init(net, rt->fib6_nh, cfg, extack);
- if (err)
- goto out;
+ if (nh) {
+ nexthop_get(nh);
+ rt->nh = nh;
+ if (nexthop_has_gw(nh))
+ cfg->fc_flags |= RTF_GATEWAY;
+ } else {
+ err = fib6_nh_init(net, rt->fib6_nh, cfg, extack);
+ if (err)
+ goto out;
+ }
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
struct net_device *dev = fib6_info_nh_dev(rt);
@@ -3327,6 +3351,9 @@ static int ip6_route_del(struct fib6_config *cfg,
}
continue;
}
+ if (rt->nh && rt->nh->id == cfg->fc_nh_id)
+ goto del_rt;
+
if (cfg->fc_ifindex &&
(!rt->fib6_nh->nh_dev ||
rt->fib6_nh->nh_dev->ifindex != cfg->fc_ifindex))
@@ -3340,6 +3367,7 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
if (!fib6_info_hold_safe(rt))
continue;
+del_rt:
rcu_read_unlock();
/* if gateway was specified only delete the one hop */
@@ -3482,6 +3510,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
}
#ifdef CONFIG_IPV6_ROUTE_INFO
+/* RA routes do not use nexthop objects */
static struct fib6_info *rt6_get_route_info(struct net *net,
const struct in6_addr *prefix, int prefixlen,
const struct in6_addr *gwaddr,
@@ -3551,6 +3580,7 @@ static struct fib6_info *rt6_add_route_info(struct net *net,
}
#endif
+/* RA routes do not use nexthop objects */
struct fib6_info *rt6_get_dflt_router(struct net *net,
const struct in6_addr *addr,
struct net_device *dev)
@@ -3892,6 +3922,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
return NULL;
}
+/* not called for rt->nh set */
static bool rt6_is_dead(const struct fib6_info *rt)
{
if (rt->fib6_nh->nh_flags & RTNH_F_DEAD ||
@@ -3970,7 +4001,7 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg)
const struct arg_netdev_event *arg = p_arg;
struct net *net = dev_net(arg->dev);
- if (rt != net->ipv6.fib6_null_entry &&
+ if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
rt->fib6_nh->nh_dev == arg->dev) {
rt->fib6_nh->nh_flags &= ~arg->nh_flags;
fib6_update_sernum_upto_root(net, rt);
@@ -4179,6 +4210,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_IP_PROTO] = { .type = NLA_U8 },
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
+ [RTA_NH_ID] = { .type = NLA_U32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -4224,6 +4256,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
+ if (tb[RTA_NH_ID])
+ cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
+
if (tb[RTA_GATEWAY]) {
cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
cfg->fc_flags |= RTF_GATEWAY;
@@ -4421,6 +4456,13 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
if (nla)
r_cfg.fc_encap_type = nla_get_u16(nla);
+ nla = nla_find(attrs, attrlen, RTA_NH_ID);
+ if (nla) {
+ err = -EINVAL;
+ NL_SET_ERR_MSG(extack,
+ "Multipath API can not use nexthop objects.");
+ goto cleanup;
+ }
}
r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
@@ -4596,6 +4638,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
static size_t rt6_nlmsg_size(struct fib6_info *rt)
{
int nexthop_len = 0;
+ size_t nh_len;
if (rt->fib6_nsiblings) {
nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
@@ -4606,23 +4649,29 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
nexthop_len *= rt->fib6_nsiblings;
}
+ if (rt->nh) {
+ nh_len = nla_total_size(4); /* RTA_NH_ID */
+ } else {
+ nh_len = lwtunnel_get_encap_size(rt->fib6_nh->nh_lwtstate)
+ + nla_total_size(16) /* RTA_GATEWAY */
+ + nla_total_size(4); /* RTA_OIF */
+ }
+
return NLMSG_ALIGN(sizeof(struct rtmsg))
+ nla_total_size(16) /* RTA_SRC */
+ nla_total_size(16) /* RTA_DST */
- + nla_total_size(16) /* RTA_GATEWAY */
+ nla_total_size(16) /* RTA_PREFSRC */
+ nla_total_size(4) /* RTA_TABLE */
+ nla_total_size(4) /* RTA_IIF */
- + nla_total_size(4) /* RTA_OIF */
+ nla_total_size(4) /* RTA_PRIORITY */
+ RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+ nla_total_size(1) /* RTA_PREF */
- + lwtunnel_get_encap_size(rt->fib6_nh->nh_lwtstate)
- + nexthop_len;
+ + nexthop_len + nh_len;
}
+/* not called for rt->nh set */
static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
unsigned int *flags, bool skip_oif)
{
@@ -4777,10 +4826,14 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
goto nla_put_failure;
+ if (rt->nh) {
+ if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
+ goto nla_put_failure;
+
/* For multipath routes, walk the siblings list and add
* each as a nexthop within RTA_MULTIPATH.
*/
- if (rt->fib6_nsiblings) {
+ } else if (rt->fib6_nsiblings) {
struct fib6_info *sibling, *next_sibling;
struct nlattr *mp;
--
2.11.0
next prev parent reply other threads:[~2018-09-01 5:20 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-09-01 0:49 [PATCH RFC net-next 00/18] net: Improve route scalability via support for nexthop objects dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 01/18] net: Rename net/nexthop.h net/rtnh.h dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 02/18] net: ipv4: export fib_good_nh and fib_flush dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 03/18] net/ipv4: export fib_info_update_nh_saddr dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 04/18] net/ipv4: export fib_check_nh dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 05/18] net/ipv4: Define fib_get_nhs when CONFIG_IP_ROUTE_MULTIPATH is disabled dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 06/18] net/ipv4: Create init and release helpers for fib_nh dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 07/18] net: ipv4: Add fib_nh to fib_result dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 08/18] net/ipv4: Move device validation to helper dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 09/18] net/ipv6: Create init and release helpers for fib6_nh dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 10/18] net/ipv6: Make fib6_nh optional at the end of fib6_info dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 11/18] net: Initial nexthop code dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 12/18] net/ipv4: Add nexthop helpers for ipv4 integration dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 13/18] net/ipv4: Convert existing use of fib_info to new helpers dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 14/18] net/ipv4: Allow routes to use nexthop objects dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 15/18] net/ipv6: Use helpers to access fib6_nh data dsahern
2018-09-01 0:49 ` dsahern [this message]
2018-09-01 0:49 ` [PATCH RFC net-next 17/18] net: Add support for nexthop groups dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 18/18] net/ipv4: Optimization for fib_info lookup dsahern
2018-09-01 20:43 ` Stephen Hemminger
2018-09-04 15:27 ` David Ahern
2018-09-01 0:49 ` [PATCH iproute2-next] ip: Add support for nexthop objects dsahern
2018-09-01 20:37 ` Stephen Hemminger
2018-09-04 15:30 ` David Ahern
2018-09-02 17:34 ` [PATCH RFC net-next 00/18] net: Improve route scalability via " David Miller
2018-09-04 15:57 ` David Ahern
2018-12-11 12:52 ` Jan Maria Matejka
2018-12-12 20:27 ` David Ahern
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180901004954.7145-17-dsahern@kernel.org \
--to=dsahern@kernel.org \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=idosch@mellanox.com \
--cc=netdev@vger.kernel.org \
--cc=roopa@cumulusnetworks.com \
--cc=sharpd@cumulusnetworks.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.