From: dsahern@kernel.org
To: netdev@vger.kernel.org
Cc: roopa@cumulusnetworks.com, sharpd@cumulusnetworks.com,
idosch@mellanox.com, davem@davemloft.net,
David Ahern <dsahern@gmail.com>
Subject: [PATCH RFC net-next 14/18] net/ipv4: Allow routes to use nexthop objects
Date: Fri, 31 Aug 2018 17:49:49 -0700 [thread overview]
Message-ID: <20180901004954.7145-15-dsahern@kernel.org> (raw)
In-Reply-To: <20180901004954.7145-1-dsahern@kernel.org>
From: David Ahern <dsahern@gmail.com>
Add new RTA attribute to allow a user to specify a nexthop id to use
with a route instead of the current nexthop specification.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/ip_fib.h | 1 +
include/uapi/linux/rtnetlink.h | 1 +
net/ipv4/fib_frontend.c | 7 +++
net/ipv4/fib_semantics.c | 139 ++++++++++++++++++++++++++++++-----------
net/ipv4/fib_trie.c | 33 +++++++---
5 files changed, 136 insertions(+), 45 deletions(-)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index c59e0f1ba59b..d2f961de732d 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -40,6 +40,7 @@ struct fib_config {
u32 fc_flags;
u32 fc_priority;
__be32 fc_prefsrc;
+ u32 fc_nh_id;
struct nlattr *fc_mx;
struct rtnexthop *fc_mp;
int fc_mx_len;
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 4a0615797e5e..a036368798a9 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -349,6 +349,7 @@ enum rtattr_type_t {
RTA_IP_PROTO,
RTA_SPORT,
RTA_DPORT,
+ RTA_NH_ID,
__RTA_MAX
};
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c483453bf037..cf133d4e02f2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -322,6 +322,9 @@ static bool fib_info_nh_uses_dev(struct fib_info *fi,
bool dev_match = false;
int ret;
+ if (fi->nh)
+ return nexthop_uses_dev(fi->nh, dev);
+
#ifdef CONFIG_IP_ROUTE_MULTIPATH
for (ret = 0; ret < fi->fib_nhs; ret++) {
struct fib_nh *nh = &fi->fib_nh[ret];
@@ -663,6 +666,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_IP_PROTO] = { .type = NLA_U8 },
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
+ [RTA_NH_ID] = { .type = NLA_U32 },
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@@ -746,6 +750,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
if (err < 0)
goto errout;
break;
+ case RTA_NH_ID:
+ cfg->fc_nh_id = nla_get_u32(attr);
+ break;
}
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0cd536ad1761..c91cdafd40ec 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -226,9 +226,13 @@ static void free_fib_info_rcu(struct rcu_head *head)
struct fib_info *fi = container_of(head, struct fib_info, rcu);
struct dst_metrics *m;
- change_nexthops(fi) {
- fib_nh_release(fi->fib_net, nexthop_nh);
- } endfor_nexthops(fi);
+ if (fi->nh) {
+ nexthop_put(fi->nh);
+ } else {
+ change_nexthops(fi) {
+ fib_nh_release(fi->fib_net, nexthop_nh);
+ } endfor_nexthops(fi);
+ }
m = fi->fib_metrics;
if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
@@ -260,11 +264,15 @@ void fib_release_info(struct fib_info *fi)
hlist_del(&fi->fib_hash);
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
- change_nexthops(fi) {
- if (!nexthop_nh->nh_dev)
- continue;
- hlist_del(&nexthop_nh->nh_hash);
- } endfor_nexthops(fi)
+ if (fi->nh) {
+ list_del(&fi->nh_list);
+ } else {
+ change_nexthops(fi) {
+ if (!nexthop_nh->nh_dev)
+ continue;
+ hlist_del(&nexthop_nh->nh_hash);
+ } endfor_nexthops(fi)
+ }
fi->fib_dead = 1;
fib_info_put(fi);
}
@@ -275,6 +283,12 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
{
const struct fib_nh *onh = ofi->fib_nh;
+ if (fi->nh || ofi->nh)
+ return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1;
+
+ if (ofi->fib_nhs == 0)
+ return 0;
+
for_nexthops(fi) {
if (nh->nh_oif != onh->nh_oif ||
nh->nh_gw != onh->nh_gw ||
@@ -310,10 +324,13 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
val ^= (fi->fib_protocol << 8) | fi->fib_scope;
val ^= (__force u32)fi->fib_prefsrc;
val ^= fi->fib_priority;
- for_nexthops(fi) {
- val ^= fib_devindex_hashfn(nh->nh_oif);
- } endfor_nexthops(fi)
-
+ if (fi->nh) {
+ val ^= fib_devindex_hashfn(fi->nh->id);
+ } else {
+ for_nexthops(fi) {
+ val ^= fib_devindex_hashfn(nh->nh_oif);
+ } endfor_nexthops(fi)
+ }
return (val ^ (val >> 7) ^ (val >> 12)) & mask;
}
@@ -339,7 +356,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(u32) * RTAX_MAX) == 0 &&
!((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
- (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
+ (nh_comp(fi, nfi) == 0))
return fi;
}
@@ -349,6 +366,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
/* Check, that the gateway is already configured.
* Used only by redirect accept routine.
*/
+//TO-DO: need a nexthop version
int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
@@ -381,16 +399,19 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
+ nla_total_size(4) /* RTA_PRIORITY */
+ nla_total_size(4) /* RTA_PREFSRC */
+ nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
+ size_t nhsize = 0;
/* space for nested metrics */
payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
- if (fi->fib_nhs) {
+ if (fi->nh) {
+ nhsize = nla_total_size(4); /* RTA_NH_ID */
+ } else if (fi->fib_nhs) {
size_t nh_encapsize = 0;
/* Also handles the special case fib_nhs == 1 */
/* each nexthop is packed in an attribute */
- size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
+ nhsize = nla_total_size(sizeof(struct rtnexthop));
/* may contain flow and gateway attribute */
nhsize += 2 * nla_total_size(4);
@@ -539,6 +560,7 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
return nhs;
}
+/* only called when fib_nh is integrated into fib_info */
static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
int remaining, struct fib_config *cfg,
struct netlink_ext_ack *extack)
@@ -625,6 +647,8 @@ static void fib_rebalance(struct fib_info *fi)
int w;
struct in_device *in_dev;
+ WARN_ON(fi->nh);
+
if (fi->fib_nhs < 2)
return;
@@ -712,6 +736,9 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
return 1;
+ if (fi->nh)
+ return cfg->fc_nh_id == fi->nh->id ? 0 : 1;
+
if (cfg->fc_oif || cfg->fc_gw) {
if (cfg->fc_encap) {
if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
@@ -1099,9 +1126,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
{
int err;
struct fib_info *fi = NULL;
+ struct nexthop *nh = NULL;
struct fib_info *ofi;
int nhs = 1;
struct net *net = cfg->fc_nlinfo.nl_net;
+ unsigned char scope;
if (cfg->fc_type > RTN_MAX)
goto err_inval;
@@ -1118,6 +1147,21 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
+ if (cfg->fc_nh_id) {
+ if (cfg->fc_oif || cfg->fc_gw || cfg->fc_encap || cfg->fc_mp) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop specification and nexthop id are mutually exclusive");
+ goto err_inval;
+ }
+
+ nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+ if (!nh) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid nexthop id - nexthop does not exist");
+ goto err_inval;
+ }
+ }
+
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (cfg->fc_mp) {
nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
@@ -1180,7 +1224,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
if (err)
goto failure;
- if (cfg->fc_mp) {
+ if (nh) {
+ nexthop_get(nh);
+ fi->nh = nh;
+ } else if (cfg->fc_mp) {
err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
if (err != 0)
goto failure;
@@ -1214,7 +1261,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
- if (cfg->fc_scope == RT_SCOPE_HOST) {
+ if (fi->nh) {
+ err = fib_check_nexthop(fi, cfg, extack);
+ if (err)
+ goto failure;
+ } else if (cfg->fc_scope == RT_SCOPE_HOST) {
struct fib_nh *nh = fi->fib_nh;
/* Local address is added. */
@@ -1254,12 +1305,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
- change_nexthops(fi) {
- fib_info_update_nh_saddr(net, nexthop_nh, fi->fib_scope);
- } endfor_nexthops(fi)
-
- fib_rebalance(fi);
+ if (!fi->nh) {
+ scope = fi->fib_scope;
+ change_nexthops(fi) {
+ fib_info_update_nh_saddr(net, nexthop_nh, scope);
+ } endfor_nexthops(fi)
+ fib_rebalance(fi);
+ }
link_it:
ofi = fib_find_info(fi);
if (ofi) {
@@ -1280,16 +1333,20 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
hlist_add_head(&fi->fib_lhash, head);
}
- change_nexthops(fi) {
- struct hlist_head *head;
- unsigned int hash;
+ if (fi->nh) {
+ list_add(&fi->nh_list, &nh->fi_list);
+ } else {
+ change_nexthops(fi) {
+ struct hlist_head *head;
+ unsigned int hash;
- if (!nexthop_nh->nh_dev)
- continue;
- hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
- head = &fib_info_devhash[hash];
- hlist_add_head(&nexthop_nh->nh_hash, head);
- } endfor_nexthops(fi)
+ if (!nexthop_nh->nh_dev)
+ continue;
+ hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
+ head = &fib_info_devhash[hash];
+ hlist_add_head(&nexthop_nh->nh_hash, head);
+ } endfor_nexthops(fi)
+ }
spin_unlock_bh(&fib_info_lock);
return fi;
@@ -1298,6 +1355,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
failure:
if (fi) {
+ if (fi->nh)
+ nexthop_put(fi->nh);
+
fi->fib_dead = 1;
free_fib_info(fi);
}
@@ -1344,7 +1404,11 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
if (fi->fib_prefsrc &&
nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
goto nla_put_failure;
- if (fi->fib_nhs == 1) {
+
+ if (fi->nh) {
+ if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id))
+ goto nla_put_failure;
+ } else if (fi->fib_nhs == 1) {
if (fi->fib_nh->nh_gw &&
nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
goto nla_put_failure;
@@ -1587,8 +1651,11 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
continue;
break;
}
- if (next_fi->fib_flags & RTNH_F_DEAD)
+
+ fnh = fib_info_nh(next_fi, 0);
+ if (fnh->nh_flags & RTNH_F_DEAD)
continue;
+
last_tos = fa->fa_tos;
last_prio = next_fi->fib_priority;
@@ -1596,7 +1663,6 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
fa->fa_type != RTN_UNICAST)
continue;
- fnh = fib_info_nh(next_fi, 0);
if (!fnh->nh_gw || fnh->nh_scope != RT_SCOPE_LINK)
continue;
@@ -1749,13 +1815,14 @@ void fib_select_multipath(struct fib_result *res, int hash)
void fib_select_path(struct net *net, struct fib_result *res,
struct flowi4 *fl4, const struct sk_buff *skb)
{
+ int h;
+
if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
goto check_saddr;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi->fib_nhs > 1) {
- int h = fib_multipath_hash(net, fl4, skb, NULL);
-
+ h = fib_multipath_hash(net, fl4, skb, NULL);
fib_select_multipath(res, h);
}
else
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c6aab049a4ac..575bb34d895f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1445,7 +1445,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
/* Step 3: Process the leaf, if that fails fall back to backtracing */
hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
struct fib_info *fi = fa->fa_info;
- int nhsel, err;
+ int nhsel, err, nhmax;
if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
if (index >= (1ul << fa->fa_slen))
@@ -1460,6 +1460,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
fib_alias_accessed(fa);
err = fib_props[fa->fa_type].error;
if (unlikely(err < 0)) {
+out_reject:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_passed);
#endif
@@ -1468,17 +1469,31 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
}
if (fi->fib_flags & RTNH_F_DEAD)
continue;
- for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
- struct fib_nh *nh = &fi->fib_nh[nhsel];
- struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev);
+
+ if (fi->nh) {
+ if (nexthop_is_blackhole(fi->nh)) {
+ err = fib_props[RTN_BLACKHOLE].error;
+ goto out_reject;
+ }
+ nhmax = nexthop_num_path(fi->nh);
+ } else {
+ nhmax = fi->fib_nhs;
+ }
+ for (nhsel = 0; nhsel < nhmax; nhsel++) {
+ struct fib_nh *nh = fib_info_nh(fi, nhsel);
+ struct in_device *in_dev;
if (nh->nh_flags & RTNH_F_DEAD)
continue;
- if (in_dev &&
- IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
- nh->nh_flags & RTNH_F_LINKDOWN &&
- !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
- continue;
+
+ if (!fi->nh) {
+ in_dev = __in_dev_get_rcu(nh->nh_dev);
+ if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ nh->nh_flags & RTNH_F_LINKDOWN &&
+ !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
+ continue;
+ }
if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
if (flp->flowi4_oif &&
flp->flowi4_oif != nh->nh_oif)
--
2.11.0
next prev parent reply other threads:[~2018-09-01 5:20 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-09-01 0:49 [PATCH RFC net-next 00/18] net: Improve route scalability via support for nexthop objects dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 01/18] net: Rename net/nexthop.h net/rtnh.h dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 02/18] net: ipv4: export fib_good_nh and fib_flush dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 03/18] net/ipv4: export fib_info_update_nh_saddr dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 04/18] net/ipv4: export fib_check_nh dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 05/18] net/ipv4: Define fib_get_nhs when CONFIG_IP_ROUTE_MULTIPATH is disabled dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 06/18] net/ipv4: Create init and release helpers for fib_nh dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 07/18] net: ipv4: Add fib_nh to fib_result dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 08/18] net/ipv4: Move device validation to helper dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 09/18] net/ipv6: Create init and release helpers for fib6_nh dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 10/18] net/ipv6: Make fib6_nh optional at the end of fib6_info dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 11/18] net: Initial nexthop code dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 12/18] net/ipv4: Add nexthop helpers for ipv4 integration dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 13/18] net/ipv4: Convert existing use of fib_info to new helpers dsahern
2018-09-01 0:49 ` dsahern [this message]
2018-09-01 0:49 ` [PATCH RFC net-next 15/18] net/ipv6: Use helpers to access fib6_nh data dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 16/18] net/ipv6: Allow routes to use nexthop objects dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 17/18] net: Add support for nexthop groups dsahern
2018-09-01 0:49 ` [PATCH RFC net-next 18/18] net/ipv4: Optimization for fib_info lookup dsahern
2018-09-01 20:43 ` Stephen Hemminger
2018-09-04 15:27 ` David Ahern
2018-09-01 0:49 ` [PATCH iproute2-next] ip: Add support for nexthop objects dsahern
2018-09-01 20:37 ` Stephen Hemminger
2018-09-04 15:30 ` David Ahern
2018-09-02 17:34 ` [PATCH RFC net-next 00/18] net: Improve route scalability via " David Miller
2018-09-04 15:57 ` David Ahern
2018-12-11 12:52 ` Jan Maria Matejka
2018-12-12 20:27 ` David Ahern
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180901004954.7145-15-dsahern@kernel.org \
--to=dsahern@kernel.org \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=idosch@mellanox.com \
--cc=netdev@vger.kernel.org \
--cc=roopa@cumulusnetworks.com \
--cc=sharpd@cumulusnetworks.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.