* [PATCH RFC net-next 01/27] ipv4: Use accessors for fib_info nexthop data
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
@ 2019-06-01 3:35 ` David Ahern
2019-06-01 3:35 ` [PATCH RFC net-next 02/27] ipv4: Prepare for fib6_nh from a nexthop object David Ahern
` (25 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:35 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Use helpers to access fib_nh and fib_nhs fields of a fib_info. Drop the
fib_dev macro which is an alias for the first nexthop. Replacements:
fi->fib_dev --> fib_info_nh(fi, 0)->fib_nh_dev
fi->fib_nh --> fib_info_nh(fi, 0)
fi->fib_nh[i] --> fib_info_nh(fi, i)
fi->fib_nhs --> fib_info_num_path(fi)
where fib_info_nh(fi, i) returns fi->fib_nh[nhsel] and fib_info_num_path
returns fi->fib_nhs.
Move the existing fib_info_nhc to nexthop.h and define the new ones
there. A later patch adds a check if a fib_info uses a nexthop object,
and defining the helpers in nexthop.h avoid circular header
dependencies.
After this all remaining open coded references to fi->fib_nhs and
fi->fib_nh are in:
- fib_create_info and helpers used to lookup an existing fib_info
entry, and
- the netdev event functions fib_sync_down_dev and fib_sync_up.
The latter two will not be reused for nexthops, and the fib_create_info
will be updated to handle a nexthop in a fib_info.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 29 ++++++----
.../net/ethernet/mellanox/mlxsw/spectrum_router.c | 19 ++++---
drivers/net/ethernet/rocker/rocker_ofdpa.c | 25 +++++---
include/net/ip_fib.h | 6 --
include/net/nexthop.h | 15 +++++
net/core/filter.c | 3 +-
net/ipv4/fib_frontend.c | 11 ++--
net/ipv4/fib_lookup.h | 1 +
net/ipv4/fib_rules.c | 8 ++-
net/ipv4/fib_semantics.c | 66 ++++++++++++----------
net/ipv4/fib_trie.c | 26 +++++----
net/ipv4/route.c | 3 +-
12 files changed, 132 insertions(+), 80 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index 8212bfd05733..2cbfaa8da7fc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include <linux/netdevice.h>
+#include <net/nexthop.h>
#include "lag.h"
#include "lag_mp.h"
#include "mlx5_core.h"
@@ -110,6 +111,8 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
struct fib_info *fi)
{
struct lag_mp *mp = &ldev->lag_mp;
+ struct fib_nh *fib_nh0, *fib_nh1;
+ unsigned int nhs;
/* Handle delete event */
if (event == FIB_EVENT_ENTRY_DEL) {
@@ -120,9 +123,11 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
}
/* Handle add/replace event */
- if (fi->fib_nhs == 1) {
+ nhs = fib_info_num_path(fi);
+ if (nhs == 1) {
if (__mlx5_lag_is_active(ldev)) {
- struct net_device *nh_dev = fi->fib_nh[0].fib_nh_dev;
+ struct fib_nh *nh = fib_info_nh(fi, 0);
+ struct net_device *nh_dev = nh->fib_nh_dev;
int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
mlx5_lag_set_port_affinity(ldev, ++i);
@@ -130,14 +135,16 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
return;
}
- if (fi->fib_nhs != 2)
+ if (nhs != 2)
return;
/* Verify next hops are ports of the same hca */
- if (!(fi->fib_nh[0].fib_nh_dev == ldev->pf[0].netdev &&
- fi->fib_nh[1].fib_nh_dev == ldev->pf[1].netdev) &&
- !(fi->fib_nh[0].fib_nh_dev == ldev->pf[1].netdev &&
- fi->fib_nh[1].fib_nh_dev == ldev->pf[0].netdev)) {
+ fib_nh0 = fib_info_nh(fi, 0);
+ fib_nh1 = fib_info_nh(fi, 1);
+ if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev &&
+ fib_nh1->fib_nh_dev == ldev->pf[1].netdev) &&
+ !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev &&
+ fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) {
mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n");
return;
}
@@ -174,7 +181,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
mlx5_lag_set_port_affinity(ldev, i);
}
} else if (event == FIB_EVENT_NH_ADD &&
- fi->fib_nhs == 2) {
+ fib_info_num_path(fi) == 2) {
mlx5_lag_set_port_affinity(ldev, 0);
}
}
@@ -238,6 +245,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
struct mlx5_fib_event_work *fib_work;
struct fib_entry_notifier_info *fen_info;
struct fib_nh_notifier_info *fnh_info;
+ struct net_device *fib_dev;
struct fib_info *fi;
if (info->family != AF_INET)
@@ -254,8 +262,9 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
fen_info = container_of(info, struct fib_entry_notifier_info,
info);
fi = fen_info->fi;
- if (fi->fib_dev != ldev->pf[0].netdev &&
- fi->fib_dev != ldev->pf[1].netdev) {
+ fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
+ if (fib_dev != ldev->pf[0].netdev &&
+ fib_dev != ldev->pf[1].netdev) {
return NOTIFY_DONE;
}
fib_work = mlx5_lag_init_fib_work(ldev, event);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 0ec52be7cc33..4f781358aef1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -21,6 +21,7 @@
#include <net/arp.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
+#include <net/nexthop.h>
#include <net/fib_rules.h>
#include <net/ip_tunnels.h>
#include <net/l3mdev.h>
@@ -3816,23 +3817,25 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
}
static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
- const struct fib_info *fi)
+ struct fib_info *fi)
{
- return fi->fib_nh->fib_nh_scope == RT_SCOPE_LINK ||
- mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
+ const struct fib_nh *nh = fib_info_nh(fi, 0);
+
+ return nh->fib_nh_scope == RT_SCOPE_LINK ||
+ mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
}
static struct mlxsw_sp_nexthop_group *
mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
{
+ unsigned int nhs = fib_info_num_path(fi);
struct mlxsw_sp_nexthop_group *nh_grp;
struct mlxsw_sp_nexthop *nh;
struct fib_nh *fib_nh;
int i;
int err;
- nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs),
- GFP_KERNEL);
+ nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL);
if (!nh_grp)
return ERR_PTR(-ENOMEM);
nh_grp->priv = fi;
@@ -3840,11 +3843,11 @@ mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
nh_grp->neigh_tbl = &arp_tbl;
nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
- nh_grp->count = fi->fib_nhs;
+ nh_grp->count = nhs;
fib_info_hold(fi);
for (i = 0; i < nh_grp->count; i++) {
nh = &nh_grp->nexthops[i];
- fib_nh = &fi->fib_nh[i];
+ fib_nh = fib_info_nh(fi, i);
err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
if (err)
goto err_nexthop4_init;
@@ -4282,9 +4285,9 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
const struct fib_entry_notifier_info *fen_info,
struct mlxsw_sp_fib_entry *fib_entry)
{
+ struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
- struct net_device *dev = fen_info->fi->fib_dev;
struct mlxsw_sp_ipip_entry *ipip_entry;
struct fib_info *fi = fen_info->fi;
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 30a49802fb51..47ed9d41047f 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -22,6 +22,7 @@
#include <net/neighbour.h>
#include <net/switchdev.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/arp.h>
#include "rocker.h"
@@ -2286,8 +2287,8 @@ static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port, __be32 dst,
/* XXX support ECMP */
- nh = fi->fib_nh;
- nh_on_port = (fi->fib_dev == ofdpa_port->dev);
+ nh = fib_info_nh(fi, 0);
+ nh_on_port = (nh->fib_nh_dev == ofdpa_port->dev);
has_gw = !!nh->fib_nh_gw4;
if (has_gw && nh_on_port) {
@@ -2737,11 +2738,13 @@ static int ofdpa_fib4_add(struct rocker *rocker,
{
struct ofdpa *ofdpa = rocker->wpriv;
struct ofdpa_port *ofdpa_port;
+ struct fib_nh *nh;
int err;
if (ofdpa->fib_aborted)
return 0;
- ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
+ nh = fib_info_nh(fen_info->fi, 0);
+ ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker);
if (!ofdpa_port)
return 0;
err = ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst),
@@ -2749,7 +2752,7 @@ static int ofdpa_fib4_add(struct rocker *rocker,
fen_info->tb_id, 0);
if (err)
return err;
- fen_info->fi->fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
+ nh->fib_nh_flags |= RTNH_F_OFFLOAD;
return 0;
}
@@ -2758,13 +2761,15 @@ static int ofdpa_fib4_del(struct rocker *rocker,
{
struct ofdpa *ofdpa = rocker->wpriv;
struct ofdpa_port *ofdpa_port;
+ struct fib_nh *nh;
if (ofdpa->fib_aborted)
return 0;
- ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
+ nh = fib_info_nh(fen_info->fi, 0);
+ ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker);
if (!ofdpa_port)
return 0;
- fen_info->fi->fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
+ nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
return ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst),
fen_info->dst_len, fen_info->fi,
fen_info->tb_id, OFDPA_OP_FLAG_REMOVE);
@@ -2784,14 +2789,16 @@ static void ofdpa_fib4_abort(struct rocker *rocker)
spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags);
hash_for_each_safe(ofdpa->flow_tbl, bkt, tmp, flow_entry, entry) {
+ struct fib_nh *nh;
+
if (flow_entry->key.tbl_id !=
ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING)
continue;
- ofdpa_port = ofdpa_port_dev_lower_find(flow_entry->fi->fib_dev,
- rocker);
+ nh = fib_info_nh(flow_entry->fi, 0);
+ ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker);
if (!ofdpa_port)
continue;
- flow_entry->fi->fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
+ nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
ofdpa_flow_tbl_del(ofdpa_port, OFDPA_OP_FLAG_REMOVE,
flow_entry);
}
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 70ba0302c8c9..42b1a806f6f5 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -153,7 +153,6 @@ struct fib_info {
bool nh_updated;
struct rcu_head rcu;
struct fib_nh fib_nh[0];
-#define fib_dev fib_nh[0].fib_nh_dev
};
@@ -190,11 +189,6 @@ struct fib_result_nl {
int err;
};
-static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
-{
- return &fi->fib_nh[nhsel].nh_common;
-}
-
#ifdef CONFIG_IP_MULTIPLE_TABLES
#define FIB_TABLE_HASHSZ 256
#else
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 6e1b8f53624c..e501d77b82c8 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -192,4 +192,19 @@ static inline bool nexthop_is_blackhole(const struct nexthop *nh)
nhi = rcu_dereference_rtnl(nh->nh_info);
return nhi->reject_nh;
}
+
+static inline unsigned int fib_info_num_path(const struct fib_info *fi)
+{
+ return fi->fib_nhs;
+}
+
+static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
+{
+ return &fi->fib_nh[nhsel].nh_common;
+}
+
+static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
+{
+ return &fi->fib_nh[nhsel];
+}
#endif
diff --git a/net/core/filter.c b/net/core/filter.c
index 55bfc941d17a..2ae72bbfa6d2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -66,6 +66,7 @@
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/flow.h>
#include <net/arp.h>
#include <net/ipv6.h>
@@ -4674,7 +4675,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (res.type != RTN_UNICAST)
return BPF_FIB_LKUP_RET_NOT_FWDED;
- if (res.fi->fib_nhs > 1)
+ if (fib_info_num_path(res.fi) > 1)
fib_select_path(net, &res, &fl4, NULL);
if (check_mtu) {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 76055c66326a..ab369959ce0b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -43,6 +43,7 @@
#include <net/sock.h>
#include <net/arp.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/rtnetlink.h>
#include <net/xfrm.h>
#include <net/l3mdev.h>
@@ -234,7 +235,9 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
if (table) {
ret = RTN_UNICAST;
if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
- if (!dev || dev == res.fi->fib_dev)
+ struct fib_nh *nh = fib_info_nh(res.fi, 0);
+
+ if (!dev || dev == nh->fib_nh_dev)
ret = res.type;
}
}
@@ -321,8 +324,8 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int ret;
- for (ret = 0; ret < fi->fib_nhs; ret++) {
- struct fib_nh *nh = &fi->fib_nh[ret];
+ for (ret = 0; ret < fib_info_num_path(fi); ret++) {
+ const struct fib_nh *nh = fib_info_nh(fi, ret);
if (nh->fib_nh_dev == dev) {
dev_match = true;
@@ -333,7 +336,7 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
}
}
#else
- if (fi->fib_nh[0].fib_nh_dev == dev)
+ if (fib_info_nh(fi, 0)->fib_nh_dev == dev)
dev_match = true;
#endif
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 7945f0534db7..a68b5e21ec51 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/list.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
struct fib_alias {
struct hlist_node fa_list;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index cfec3af54c8d..ab06fd73b343 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -31,6 +31,7 @@
#include <net/route.h>
#include <net/tcp.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/fib_rules.h>
struct fib4_rule {
@@ -145,8 +146,11 @@ static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg
struct fib_result *result = (struct fib_result *) arg->result;
struct net_device *dev = NULL;
- if (result->fi)
- dev = result->fi->fib_dev;
+ if (result->fi) {
+ struct fib_nh *nh = fib_info_nh(result->fi, 0);
+
+ dev = nh->fib_nh_dev;
+ }
/* do not accept result if the route does
* not meet the required prefix length
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 78648072783e..a37ff07718a8 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -42,6 +42,7 @@
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
+#include <net/nexthop.h>
#include <net/netlink.h>
#include <net/rtnh.h>
#include <net/lwtunnel.h>
@@ -65,13 +66,13 @@ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
#define for_nexthops(fi) { \
int nhsel; const struct fib_nh *nh; \
for (nhsel = 0, nh = (fi)->fib_nh; \
- nhsel < (fi)->fib_nhs; \
+ nhsel < fib_info_num_path((fi)); \
nh++, nhsel++)
#define change_nexthops(fi) { \
int nhsel; struct fib_nh *nexthop_nh; \
for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
- nhsel < (fi)->fib_nhs; \
+ nhsel < fib_info_num_path((fi)); \
nexthop_nh++, nhsel++)
#else /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -271,11 +272,13 @@ void fib_release_info(struct fib_info *fi)
spin_unlock_bh(&fib_info_lock);
}
-static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
+static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
{
- const struct fib_nh *onh = ofi->fib_nh;
+ const struct fib_nh *onh;
for_nexthops(fi) {
+ onh = fib_info_nh(ofi, nhsel);
+
if (nh->fib_nh_oif != onh->fib_nh_oif ||
nh->fib_nh_gw_family != onh->fib_nh_gw_family ||
nh->fib_nh_scope != onh->fib_nh_scope ||
@@ -296,8 +299,6 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
if (nh->fib_nh_gw_family == AF_INET6 &&
ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6))
return -1;
-
- onh++;
} endfor_nexthops(fi);
return 0;
}
@@ -326,7 +327,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
return (val ^ (val >> 7) ^ (val >> 12)) & mask;
}
-static struct fib_info *fib_find_info(const struct fib_info *nfi)
+static struct fib_info *fib_find_info(struct fib_info *nfi)
{
struct hlist_head *head;
struct fib_info *fi;
@@ -390,13 +391,14 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
+ nla_total_size(4) /* RTA_PRIORITY */
+ nla_total_size(4) /* RTA_PREFSRC */
+ nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
+ unsigned int nhs = fib_info_num_path(fi);
/* space for nested metrics */
payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
- if (fi->fib_nhs) {
+ if (nhs) {
size_t nh_encapsize = 0;
- /* Also handles the special case fib_nhs == 1 */
+ /* Also handles the special case nhs == 1 */
/* each nexthop is packed in an attribute */
size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
@@ -416,8 +418,7 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
} endfor_nexthops(fi);
/* all nexthops are packed in a nested attribute */
- payload += nla_total_size((fi->fib_nhs * nhsize) +
- nh_encapsize);
+ payload += nla_total_size((nhs * nhsize) + nh_encapsize);
}
@@ -584,6 +585,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
{
struct net *net = fi->fib_net;
struct fib_config fib_cfg;
+ struct fib_nh *nh;
int ret;
change_nexthops(fi) {
@@ -646,24 +648,25 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
} endfor_nexthops(fi);
ret = -EINVAL;
- if (cfg->fc_oif && fi->fib_nh->fib_nh_oif != cfg->fc_oif) {
+ nh = fib_info_nh(fi, 0);
+ if (cfg->fc_oif && nh->fib_nh_oif != cfg->fc_oif) {
NL_SET_ERR_MSG(extack,
"Nexthop device index does not match RTA_OIF");
goto errout;
}
if (cfg->fc_gw_family) {
- if (cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family ||
+ if (cfg->fc_gw_family != nh->fib_nh_gw_family ||
(cfg->fc_gw_family == AF_INET &&
- fi->fib_nh->fib_nh_gw4 != cfg->fc_gw4) ||
+ nh->fib_nh_gw4 != cfg->fc_gw4) ||
(cfg->fc_gw_family == AF_INET6 &&
- ipv6_addr_cmp(&fi->fib_nh->fib_nh_gw6, &cfg->fc_gw6))) {
+ ipv6_addr_cmp(&nh->fib_nh_gw6, &cfg->fc_gw6))) {
NL_SET_ERR_MSG(extack,
"Nexthop gateway does not match RTA_GATEWAY or RTA_VIA");
goto errout;
}
}
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
+ if (cfg->fc_flow && nh->nh_tclassid != cfg->fc_flow) {
NL_SET_ERR_MSG(extack,
"Nexthop class id does not match RTA_FLOW");
goto errout;
@@ -679,7 +682,7 @@ static void fib_rebalance(struct fib_info *fi)
int total;
int w;
- if (fi->fib_nhs < 2)
+ if (fib_info_num_path(fi) < 2)
return;
total = 0;
@@ -761,27 +764,29 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
return 1;
if (cfg->fc_oif || cfg->fc_gw_family) {
+ struct fib_nh *nh = fib_info_nh(fi, 0);
+
if (cfg->fc_encap) {
if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
- fi->fib_nh, cfg, extack))
+ nh, cfg, extack))
return 1;
}
#ifdef CONFIG_IP_ROUTE_CLASSID
if (cfg->fc_flow &&
- cfg->fc_flow != fi->fib_nh->nh_tclassid)
+ cfg->fc_flow != nh->nh_tclassid)
return 1;
#endif
- if ((cfg->fc_oif && cfg->fc_oif != fi->fib_nh->fib_nh_oif) ||
+ if ((cfg->fc_oif && cfg->fc_oif != nh->fib_nh_oif) ||
(cfg->fc_gw_family &&
- cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family))
+ cfg->fc_gw_family != nh->fib_nh_gw_family))
return 1;
if (cfg->fc_gw_family == AF_INET &&
- cfg->fc_gw4 != fi->fib_nh->fib_nh_gw4)
+ cfg->fc_gw4 != nh->fib_nh_gw4)
return 1;
if (cfg->fc_gw_family == AF_INET6 &&
- ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->fib_nh_gw6))
+ ipv6_addr_cmp(&cfg->fc_gw6, &nh->fib_nh_gw6))
return 1;
return 0;
@@ -1366,7 +1371,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
nh->fib_nh_scope = RT_SCOPE_NOWHERE;
- nh->fib_nh_dev = dev_get_by_index(net, fi->fib_nh->fib_nh_oif);
+ nh->fib_nh_dev = dev_get_by_index(net, nh->fib_nh_oif);
err = -ENODEV;
if (!nh->fib_nh_dev)
goto failure;
@@ -1583,6 +1588,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
struct fib_info *fi, unsigned int flags)
{
+ unsigned int nhs = fib_info_num_path(fi);
struct nlmsghdr *nlh;
struct rtmsg *rtm;
@@ -1618,8 +1624,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
if (fi->fib_prefsrc &&
nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
goto nla_put_failure;
- if (fi->fib_nhs == 1) {
- struct fib_nh *nh = &fi->fib_nh[0];
+ if (nhs == 1) {
+ const struct fib_nh *nh = fib_info_nh(fi, 0);
unsigned char flags = 0;
if (fib_nexthop_info(skb, &nh->nh_common, &flags, false) < 0)
@@ -1838,6 +1844,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
struct fib_info *next_fi = fa->fa_info;
+ struct fib_nh *nh;
if (fa->fa_slen != slen)
continue;
@@ -1859,8 +1866,9 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
if (next_fi->fib_scope != res->scope ||
fa->fa_type != RTN_UNICAST)
continue;
- if (!next_fi->fib_nh[0].fib_nh_gw4 ||
- next_fi->fib_nh[0].fib_nh_scope != RT_SCOPE_LINK)
+
+ nh = fib_info_nh(next_fi, 0);
+ if (!nh->fib_nh_gw4 || nh->fib_nh_scope != RT_SCOPE_LINK)
continue;
fib_alias_accessed(fa);
@@ -2024,7 +2032,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
goto check_saddr;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi->fib_nhs > 1) {
+ if (fib_info_num_path(res->fi) > 1) {
int h = fib_multipath_hash(net, fl4, skb, NULL);
fib_select_multipath(res, h);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b53ecef89d59..5c8a4d21b8e0 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1469,7 +1469,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
}
if (fi->fib_flags & RTNH_F_DEAD)
continue;
- for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
+ for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
if (nhc->nhc_flags & RTNH_F_DEAD)
@@ -2717,14 +2717,18 @@ static void fib_route_seq_stop(struct seq_file *seq, void *v)
rcu_read_unlock();
}
-static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
+static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
{
unsigned int flags = 0;
if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
flags = RTF_REJECT;
- if (fi && fi->fib_nh->fib_nh_gw4)
- flags |= RTF_GATEWAY;
+ if (fi) {
+ const struct fib_nh *nh = fib_info_nh(fi, 0);
+
+ if (nh->fib_nh_gw4)
+ flags |= RTF_GATEWAY;
+ }
if (mask == htonl(0xFFFFFFFF))
flags |= RTF_HOST;
flags |= RTF_UP;
@@ -2755,7 +2759,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
prefix = htonl(l->key);
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
- const struct fib_info *fi = fa->fa_info;
+ struct fib_info *fi = fa->fa_info;
__be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
@@ -2768,26 +2772,28 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
seq_setwidth(seq, 127);
- if (fi)
+ if (fi) {
+ struct fib_nh *nh = fib_info_nh(fi, 0);
+
seq_printf(seq,
"%s\t%08X\t%08X\t%04X\t%d\t%u\t"
"%d\t%08X\t%d\t%u\t%u",
- fi->fib_dev ? fi->fib_dev->name : "*",
+ nh->fib_nh_dev ? nh->fib_nh_dev->name : "*",
prefix,
- fi->fib_nh->fib_nh_gw4, flags, 0, 0,
+ nh->fib_nh_gw4, flags, 0, 0,
fi->fib_priority,
mask,
(fi->fib_advmss ?
fi->fib_advmss + 40 : 0),
fi->fib_window,
fi->fib_rtt >> 3);
- else
+ } else {
seq_printf(seq,
"*\t%08X\t%08X\t%04X\t%d\t%u\t"
"%d\t%08X\t%d\t%u\t%u",
prefix, 0, flags, 0, 0, 0,
mask, 0, 0, 0);
-
+ }
seq_pad(seq, '\n');
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 11ddc276776e..05a6a8ecb574 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -99,6 +99,7 @@
#include <net/inetpeer.h>
#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/arp.h>
#include <net/tcp.h>
#include <net/icmp.h>
@@ -1950,7 +1951,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi && res->fi->fib_nhs > 1) {
+ if (res->fi && fib_info_num_path(res->fi) > 1) {
int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 02/27] ipv4: Prepare for fib6_nh from a nexthop object
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
2019-06-01 3:35 ` [PATCH RFC net-next 01/27] ipv4: Use accessors for fib_info nexthop data David Ahern
@ 2019-06-01 3:35 ` David Ahern
2019-06-01 3:35 ` [PATCH RFC net-next 03/27] ipv4: Plumb support for nexthop object in a fib_info David Ahern
` (24 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:35 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Convert more IPv4 code to use fib_nh_common over fib_nh to enable routes
to use a fib6_nh based nexthop. In the end, only code not using a
nexthop object in a fib_info should directly access fib_nh in a fib_info
without checking the famiy and going through fib_nh_common. Those
functions will be marked when it is not directly evident.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/ip_fib.h | 15 +++++++++----
net/ipv4/fib_frontend.c | 12 +++++------
net/ipv4/fib_rules.c | 4 ++--
net/ipv4/fib_semantics.c | 55 +++++++++++++++++++++++++++++++++---------------
net/ipv4/fib_trie.c | 15 +++++++------
net/ipv4/nexthop.c | 3 ++-
net/ipv4/route.c | 2 +-
7 files changed, 69 insertions(+), 37 deletions(-)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 42b1a806f6f5..7da8ea784029 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -195,8 +195,8 @@ struct fib_result_nl {
#define FIB_TABLE_HASHSZ 2
#endif
-__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh,
- unsigned char scope);
+__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
+ unsigned char scope);
__be32 fib_result_prefsrc(struct net *net, struct fib_result *res);
#define FIB_RES_NHC(res) ((res).nhc)
@@ -455,11 +455,18 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res)
{
#ifdef CONFIG_IP_ROUTE_CLASSID
struct fib_nh_common *nhc = res->nhc;
- struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
#ifdef CONFIG_IP_MULTIPLE_TABLES
u32 rtag;
#endif
- *itag = nh->nh_tclassid << 16;
+ if (nhc->nhc_family == AF_INET) {
+ struct fib_nh *nh;
+
+ nh = container_of(nhc, struct fib_nh, nh_common);
+ *itag = nh->nh_tclassid << 16;
+ } else {
+ *itag = 0;
+ }
+
#ifdef CONFIG_IP_MULTIPLE_TABLES
rtag = res->tclassid;
if (*itag == 0)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ab369959ce0b..8e49baa00d20 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -235,9 +235,9 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
if (table) {
ret = RTN_UNICAST;
if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
- struct fib_nh *nh = fib_info_nh(res.fi, 0);
+ struct fib_nh_common *nhc = fib_info_nhc(res.fi, 0);
- if (!dev || dev == nh->fib_nh_dev)
+ if (!dev || dev == nhc->nhc_dev)
ret = res.type;
}
}
@@ -325,18 +325,18 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
int ret;
for (ret = 0; ret < fib_info_num_path(fi); ret++) {
- const struct fib_nh *nh = fib_info_nh(fi, ret);
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, ret);
- if (nh->fib_nh_dev == dev) {
+ if (nhc->nhc_dev == dev) {
dev_match = true;
break;
- } else if (l3mdev_master_ifindex_rcu(nh->fib_nh_dev) == dev->ifindex) {
+ } else if (l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex) {
dev_match = true;
break;
}
}
#else
- if (fib_info_nh(fi, 0)->fib_nh_dev == dev)
+ if (fib_info_nhc(fi, 0)->nhc_dev == dev)
dev_match = true;
#endif
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index ab06fd73b343..88807c138df4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -147,9 +147,9 @@ static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg
struct net_device *dev = NULL;
if (result->fi) {
- struct fib_nh *nh = fib_info_nh(result->fi, 0);
+ struct fib_nh_common *nhc = fib_info_nhc(result->fi, 0);
- dev = nh->fib_nh_dev;
+ dev = nhc->nhc_dev;
}
/* do not accept result if the route does
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a37ff07718a8..4a12c69f7fa1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -61,6 +61,9 @@ static unsigned int fib_info_cnt;
#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
+/* for_nexthops and change_nexthops only used when nexthop object
+ * is not set in a fib_info. The logic within can reference fib_nh.
+ */
#ifdef CONFIG_IP_ROUTE_MULTIPATH
#define for_nexthops(fi) { \
@@ -402,20 +405,23 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
/* each nexthop is packed in an attribute */
size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
+ unsigned int i;
/* may contain flow and gateway attribute */
nhsize += 2 * nla_total_size(4);
/* grab encap info */
- for_nexthops(fi) {
- if (nh->fib_nh_lws) {
+ for (i = 0; i < fib_info_num_path(fi); i++) {
+ struct fib_nh_common *nhc = fib_info_nhc(fi, i);
+
+ if (nhc->nhc_lwtstate) {
/* RTA_ENCAP_TYPE */
nh_encapsize += lwtunnel_get_encap_size(
- nh->fib_nh_lws);
+ nhc->nhc_lwtstate);
/* RTA_ENCAP */
nh_encapsize += nla_total_size(2);
}
- } endfor_nexthops(fi);
+ }
/* all nexthops are packed in a nested attribute */
payload += nla_total_size((nhs * nhsize) + nh_encapsize);
@@ -1194,9 +1200,15 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
fib_info_hash_free(old_laddrhash, bytes);
}
-__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh,
- unsigned char scope)
+__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
+ unsigned char scope)
{
+ struct fib_nh *nh;
+
+ if (nhc->nhc_family != AF_INET)
+ return inet_select_addr(nhc->nhc_dev, 0, scope);
+
+ nh = container_of(nhc, struct fib_nh, nh_common);
nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
@@ -1206,16 +1218,19 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh,
__be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
{
struct fib_nh_common *nhc = res->nhc;
- struct fib_nh *nh;
if (res->fi->fib_prefsrc)
return res->fi->fib_prefsrc;
- nh = container_of(nhc, struct fib_nh, nh_common);
- if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
- return nh->nh_saddr;
+ if (nhc->nhc_family == AF_INET) {
+ struct fib_nh *nh;
+
+ nh = container_of(nhc, struct fib_nh, nh_common);
+ if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
+ return nh->nh_saddr;
+ }
- return fib_info_update_nh_saddr(net, nh, res->fi->fib_scope);
+ return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope);
}
static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
@@ -1397,7 +1412,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
}
change_nexthops(fi) {
- fib_info_update_nh_saddr(net, nexthop_nh, fi->fib_scope);
+ fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common,
+ fi->fib_scope);
if (nexthop_nh->fib_nh_gw_family == AF_INET6)
fi->fib_nh_is_v6 = true;
} endfor_nexthops(fi)
@@ -1625,17 +1641,22 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
goto nla_put_failure;
if (nhs == 1) {
- const struct fib_nh *nh = fib_info_nh(fi, 0);
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
unsigned char flags = 0;
- if (fib_nexthop_info(skb, &nh->nh_common, &flags, false) < 0)
+ if (fib_nexthop_info(skb, nhc, &flags, false) < 0)
goto nla_put_failure;
rtm->rtm_flags = flags;
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (nh->nh_tclassid &&
- nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
- goto nla_put_failure;
+ if (nhc->nhc_family == AF_INET) {
+ struct fib_nh *nh;
+
+ nh = container_of(nhc, struct fib_nh, nh_common);
+ if (nh->nh_tclassid &&
+ nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
+ goto nla_put_failure;
+ }
#endif
} else {
if (fib_add_multipath(skb, fi) < 0)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5c8a4d21b8e0..d704d1606b8f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2724,9 +2724,9 @@ static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
flags = RTF_REJECT;
if (fi) {
- const struct fib_nh *nh = fib_info_nh(fi, 0);
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
- if (nh->fib_nh_gw4)
+ if (nhc->nhc_gw.ipv4)
flags |= RTF_GATEWAY;
}
if (mask == htonl(0xFFFFFFFF))
@@ -2773,14 +2773,17 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
seq_setwidth(seq, 127);
if (fi) {
- struct fib_nh *nh = fib_info_nh(fi, 0);
+ struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
+ __be32 gw = 0;
+
+ if (nhc->nhc_gw_family == AF_INET)
+ gw = nhc->nhc_gw.ipv4;
seq_printf(seq,
"%s\t%08X\t%08X\t%04X\t%d\t%u\t"
"%d\t%08X\t%d\t%u\t%u",
- nh->fib_nh_dev ? nh->fib_nh_dev->name : "*",
- prefix,
- nh->fib_nh_gw4, flags, 0, 0,
+ nhc->nhc_dev ? nhc->nhc_dev->name : "*",
+ prefix, gw, flags, 0, 0,
fi->fib_priority,
mask,
(fi->fib_advmss ?
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 7a5a3d08fec3..aec4ecb145a0 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -815,7 +815,8 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
if (!err) {
nh->nh_flags = fib_nh->fib_nh_flags;
- fib_info_update_nh_saddr(net, fib_nh, fib_nh->fib_nh_scope);
+ fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
+ fib_nh->fib_nh_scope);
} else {
fib_nh_release(net, fib_nh);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 05a6a8ecb574..4a1168451f3a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1585,7 +1585,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
#ifdef CONFIG_IP_ROUTE_CLASSID
- {
+ if (nhc->nhc_family == AF_INET) {
struct fib_nh *nh;
nh = container_of(nhc, struct fib_nh, nh_common);
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 03/27] ipv4: Plumb support for nexthop object in a fib_info
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
2019-06-01 3:35 ` [PATCH RFC net-next 01/27] ipv4: Use accessors for fib_info nexthop data David Ahern
2019-06-01 3:35 ` [PATCH RFC net-next 02/27] ipv4: Prepare for fib6_nh from a nexthop object David Ahern
@ 2019-06-01 3:35 ` David Ahern
2019-06-01 3:35 ` [PATCH RFC net-next 04/27] ipv6: Plumb support for nexthop object in a fib6_info David Ahern
` (23 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:35 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add 'struct nexthop' and nh_list list_head to fib_info. nh_list is the
fib_info side of the nexthop <-> fib_info relationship.
Add fi_list list_head to 'struct nexthop' to track fib_info entries
using a nexthop instance. Add __remove_nexthop_fib and add it to
__remove_nexthop to walk the new list_head and mark those fib entries
as dead when the nexthop is deleted.
Add a few nexthop helpers for use when a nexthop is added to fib_info.
Update the fib_info functions to check for fi->nh and take a different
path as needed.
The bulk of the changes are in fib_semantics.c and most of that is
moving the existing change_nexthops into an else branch.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/ip_fib.h | 4 ++
include/net/nexthop.h | 48 ++++++++++++++++
net/ipv4/fib_semantics.c | 142 +++++++++++++++++++++++++++++++++++------------
net/ipv4/fib_trie.c | 7 +++
net/ipv4/nexthop.c | 64 +++++++++++++++++++++
5 files changed, 229 insertions(+), 36 deletions(-)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 7da8ea784029..071d280de389 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -129,9 +129,12 @@ struct fib_nh {
* This structure contains data shared by many of routes.
*/
+struct nexthop;
+
struct fib_info {
struct hlist_node fib_hash;
struct hlist_node fib_lhash;
+ struct list_head nh_list;
struct net *fib_net;
int fib_treeref;
refcount_t fib_clntref;
@@ -151,6 +154,7 @@ struct fib_info {
int fib_nhs;
bool fib_nh_is_v6;
bool nh_updated;
+ struct nexthop *nh;
struct rcu_head rcu;
struct fib_nh fib_nh[0];
};
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index e501d77b82c8..2912a2d7a515 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -77,6 +77,7 @@ struct nh_group {
struct nexthop {
struct rb_node rb_node; /* entry on netns rbtree */
+ struct list_head fi_list; /* v4 entries using nh */
struct list_head grp_list; /* nh group entries using this nh */
struct net *net;
@@ -110,6 +111,12 @@ static inline void nexthop_put(struct nexthop *nh)
call_rcu(&nh->rcu, nexthop_free_rcu);
}
+static inline bool nexthop_cmp(const struct nexthop *nh1,
+ const struct nexthop *nh2)
+{
+ return nh1 == nh2;
+}
+
static inline bool nexthop_is_multipath(const struct nexthop *nh)
{
if (nh->is_group) {
@@ -193,18 +200,59 @@ static inline bool nexthop_is_blackhole(const struct nexthop *nh)
return nhi->reject_nh;
}
+static inline void nexthop_path_fib_result(struct fib_result *res, int hash)
+{
+ struct nh_info *nhi;
+ struct nexthop *nh;
+
+ nh = nexthop_select_path(res->fi->nh, hash);
+ nhi = rcu_dereference(nh->nh_info);
+ res->nhc = &nhi->fib_nhc;
+}
+
+/* called with rcu read lock or rtnl held */
+static inline
+struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
+{
+ struct nh_info *nhi;
+
+ BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
+ BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
+
+ if (nexthop_is_multipath(nh)) {
+ nh = nexthop_mpath_select(nh, nhsel);
+ if (!nh)
+ return NULL;
+ }
+
+ nhi = rcu_dereference_rtnl(nh->nh_info);
+ return &nhi->fib_nhc;
+}
+
static inline unsigned int fib_info_num_path(const struct fib_info *fi)
{
+ if (unlikely(fi->nh))
+ return nexthop_num_path(fi->nh);
+
return fi->fib_nhs;
}
+int fib_check_nexthop(struct nexthop *nh, u8 scope,
+ struct netlink_ext_ack *extack);
+
static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
{
+ if (unlikely(fi->nh))
+ return nexthop_fib_nhc(fi->nh, nhsel);
+
return &fi->fib_nh[nhsel].nh_common;
}
+/* only used when fib_nh is built into fib_info */
static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
{
+ WARN_ON(fi->nh);
+
return &fi->fib_nh[nhsel];
}
#endif
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 4a12c69f7fa1..01e587a5dcb1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -236,9 +236,13 @@ static void free_fib_info_rcu(struct rcu_head *head)
{
struct fib_info *fi = container_of(head, struct fib_info, rcu);
- change_nexthops(fi) {
- fib_nh_release(fi->fib_net, nexthop_nh);
- } endfor_nexthops(fi);
+ if (fi->nh) {
+ nexthop_put(fi->nh);
+ } else {
+ change_nexthops(fi) {
+ fib_nh_release(fi->fib_net, nexthop_nh);
+ } endfor_nexthops(fi);
+ }
ip_fib_metrics_put(fi->fib_metrics);
@@ -264,11 +268,15 @@ void fib_release_info(struct fib_info *fi)
hlist_del(&fi->fib_hash);
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
- change_nexthops(fi) {
- if (!nexthop_nh->fib_nh_dev)
- continue;
- hlist_del(&nexthop_nh->nh_hash);
- } endfor_nexthops(fi)
+ if (fi->nh) {
+ list_del(&fi->nh_list);
+ } else {
+ change_nexthops(fi) {
+ if (!nexthop_nh->fib_nh_dev)
+ continue;
+ hlist_del(&nexthop_nh->nh_hash);
+ } endfor_nexthops(fi)
+ }
fi->fib_dead = 1;
fib_info_put(fi);
}
@@ -279,6 +287,12 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
{
const struct fib_nh *onh;
+ if (fi->nh || ofi->nh)
+ return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1;
+
+ if (ofi->fib_nhs == 0)
+ return 0;
+
for_nexthops(fi) {
onh = fib_info_nh(ofi, nhsel);
@@ -323,9 +337,14 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
val ^= (fi->fib_protocol << 8) | fi->fib_scope;
val ^= (__force u32)fi->fib_prefsrc;
val ^= fi->fib_priority;
- for_nexthops(fi) {
- val ^= fib_devindex_hashfn(nh->fib_nh_oif);
- } endfor_nexthops(fi)
+
+ if (fi->nh) {
+ val ^= fib_devindex_hashfn(fi->nh->id);
+ } else {
+ for_nexthops(fi) {
+ val ^= fib_devindex_hashfn(nh->fib_nh_oif);
+ } endfor_nexthops(fi)
+ }
return (val ^ (val >> 7) ^ (val >> 12)) & mask;
}
@@ -352,7 +371,7 @@ static struct fib_info *fib_find_info(struct fib_info *nfi)
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(u32) * RTAX_MAX) == 0 &&
!((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
- (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
+ nh_comp(fi, nfi) == 0)
return fi;
}
@@ -399,6 +418,9 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
/* space for nested metrics */
payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
+ if (fi->nh)
+ payload += nla_total_size(4); /* RTA_NH_ID */
+
if (nhs) {
size_t nh_encapsize = 0;
/* Also handles the special case nhs == 1 */
@@ -585,6 +607,7 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
return nhs;
}
+/* only called when fib_nh is integrated into fib_info */
static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
int remaining, struct fib_config *cfg,
struct netlink_ext_ack *extack)
@@ -683,6 +706,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
return ret;
}
+/* only called when fib_nh is integrated into fib_info */
static void fib_rebalance(struct fib_info *fi)
{
int total;
@@ -1262,6 +1286,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
{
int err;
struct fib_info *fi = NULL;
+ struct nexthop *nh = NULL;
struct fib_info *ofi;
int nhs = 1;
struct net *net = cfg->fc_nlinfo.nl_net;
@@ -1333,14 +1358,25 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
fi->fib_tb_id = cfg->fc_table;
fi->fib_nhs = nhs;
- change_nexthops(fi) {
- nexthop_nh->nh_parent = fi;
- } endfor_nexthops(fi)
+ if (nh) {
+ if (!nexthop_get(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ err = -EINVAL;
+ } else {
+ err = 0;
+ fi->nh = nh;
+ }
+ } else {
+ change_nexthops(fi) {
+ nexthop_nh->nh_parent = fi;
+ } endfor_nexthops(fi)
- if (cfg->fc_mp)
- err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
- else
- err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack);
+ if (cfg->fc_mp)
+ err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg,
+ extack);
+ else
+ err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack);
+ }
if (err != 0)
goto failure;
@@ -1371,7 +1407,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
- if (cfg->fc_scope == RT_SCOPE_HOST) {
+ if (fi->nh) {
+ err = fib_check_nexthop(fi->nh, cfg->fc_scope, extack);
+ if (err)
+ goto failure;
+ } else if (cfg->fc_scope == RT_SCOPE_HOST) {
struct fib_nh *nh = fi->fib_nh;
/* Local address is added. */
@@ -1411,14 +1451,16 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
- change_nexthops(fi) {
- fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common,
- fi->fib_scope);
- if (nexthop_nh->fib_nh_gw_family == AF_INET6)
- fi->fib_nh_is_v6 = true;
- } endfor_nexthops(fi)
+ if (!fi->nh) {
+ change_nexthops(fi) {
+ fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common,
+ fi->fib_scope);
+ if (nexthop_nh->fib_nh_gw_family == AF_INET6)
+ fi->fib_nh_is_v6 = true;
+ } endfor_nexthops(fi)
- fib_rebalance(fi);
+ fib_rebalance(fi);
+ }
link_it:
ofi = fib_find_info(fi);
@@ -1440,16 +1482,20 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
hlist_add_head(&fi->fib_lhash, head);
}
- change_nexthops(fi) {
- struct hlist_head *head;
- unsigned int hash;
+ if (fi->nh) {
+ list_add(&fi->nh_list, &nh->fi_list);
+ } else {
+ change_nexthops(fi) {
+ struct hlist_head *head;
+ unsigned int hash;
- if (!nexthop_nh->fib_nh_dev)
- continue;
- hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
- head = &fib_info_devhash[hash];
- hlist_add_head(&nexthop_nh->nh_hash, head);
- } endfor_nexthops(fi)
+ if (!nexthop_nh->fib_nh_dev)
+ continue;
+ hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
+ head = &fib_info_devhash[hash];
+ hlist_add_head(&nexthop_nh->nh_hash, head);
+ } endfor_nexthops(fi)
+ }
spin_unlock_bh(&fib_info_lock);
return fi;
@@ -1576,6 +1622,12 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
if (!mp)
goto nla_put_failure;
+ if (unlikely(fi->nh)) {
+ if (nexthop_mpath_fill_node(skb, fi->nh) < 0)
+ goto nla_put_failure;
+ goto mp_end;
+ }
+
for_nexthops(fi) {
if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0)
goto nla_put_failure;
@@ -1586,6 +1638,7 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
#endif
} endfor_nexthops(fi);
+mp_end:
nla_nest_end(skb, mp);
return 0;
@@ -1640,6 +1693,14 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
if (fi->fib_prefsrc &&
nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
goto nla_put_failure;
+
+ if (fi->nh) {
+ if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id))
+ goto nla_put_failure;
+ if (nexthop_is_blackhole(fi->nh))
+ rtm->rtm_type = RTN_BLACKHOLE;
+ }
+
if (nhs == 1) {
const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
unsigned char flags = 0;
@@ -1784,6 +1845,8 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
* NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host
* NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed
* NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed
+ *
+ * only used when fib_nh is built into fib_info
*/
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
@@ -1931,6 +1994,8 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
/*
* Dead device goes up. We wake up dead nexthops.
* It takes sense only on multipath routes.
+ *
+ * only used when fib_nh is built into fib_info
*/
int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{
@@ -2025,6 +2090,11 @@ void fib_select_multipath(struct fib_result *res, int hash)
struct net *net = fi->fib_net;
bool first = false;
+ if (unlikely(res->fi->nh)) {
+ nexthop_path_fib_result(res, hash);
+ return;
+ }
+
change_nexthops(fi) {
if (net->ipv4.sysctl_fib_multipath_use_neigh) {
if (!fib_good_nh(nexthop_nh))
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d704d1606b8f..716f2d66cb3f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1461,6 +1461,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
fib_alias_accessed(fa);
err = fib_props[fa->fa_type].error;
if (unlikely(err < 0)) {
+out_reject:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_passed);
#endif
@@ -1469,6 +1470,12 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
}
if (fi->fib_flags & RTNH_F_DEAD)
continue;
+
+ if (unlikely(fi->nh && nexthop_is_blackhole(fi->nh))) {
+ err = fib_props[RTN_BLACKHOLE].error;
+ goto out_reject;
+ }
+
for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index aec4ecb145a0..63cbb04f697f 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -105,6 +105,7 @@ static struct nexthop *nexthop_alloc(void)
nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
if (nh) {
+ INIT_LIST_HEAD(&nh->fi_list);
INIT_LIST_HEAD(&nh->grp_list);
}
return nh;
@@ -515,6 +516,54 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
}
EXPORT_SYMBOL_GPL(nexthop_select_path);
+static int nexthop_check_scope(struct nexthop *nh, u8 scope,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_info *nhi;
+
+ nhi = rtnl_dereference(nh->nh_info);
+ if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
+ NL_SET_ERR_MSG(extack,
+ "Route with host scope can not have a gateway");
+ return -EINVAL;
+ }
+
+ if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) {
+ NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Invoked by fib add code to verify nexthop by id is ok with
+ * config for prefix; parts of fib_check_nh not done when nexthop
+ * object is used.
+ */
+int fib_check_nexthop(struct nexthop *nh, u8 scope,
+ struct netlink_ext_ack *extack)
+{
+ int err = 0;
+
+ if (nh->is_group) {
+ struct nh_group *nhg;
+
+ if (scope == RT_SCOPE_HOST) {
+ NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
+ err = -EINVAL;
+ goto out;
+ }
+
+ nhg = rtnl_dereference(nh->nh_grp);
+ /* all nexthops in a group have the same scope */
+ err = nexthop_check_scope(nhg->nh_entries[0].nh, scope, extack);
+ } else {
+ err = nexthop_check_scope(nh, scope, extack);
+ }
+out:
+ return err;
+}
+
static void nh_group_rebalance(struct nh_group *nhg)
{
int total = 0;
@@ -607,9 +656,24 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
}
}
+static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
+{
+ bool do_flush = false;
+ struct fib_info *fi;
+
+ list_for_each_entry(fi, &nh->fi_list, nh_list) {
+ fi->fib_flags |= RTNH_F_DEAD;
+ do_flush = true;
+ }
+ if (do_flush)
+ fib_flush(net);
+}
+
static void __remove_nexthop(struct net *net, struct nexthop *nh,
struct nl_info *nlinfo)
{
+ __remove_nexthop_fib(net, nh);
+
if (nh->is_group) {
remove_nexthop_group(nh, nlinfo);
} else {
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 04/27] ipv6: Plumb support for nexthop object in a fib6_info
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (2 preceding siblings ...)
2019-06-01 3:35 ` [PATCH RFC net-next 03/27] ipv4: Plumb support for nexthop object in a fib_info David Ahern
@ 2019-06-01 3:35 ` David Ahern
2019-06-01 3:35 ` [PATCH RFC net-next 05/27] mlxsw: Fail attempts to use routes with nexthop objects David Ahern
` (22 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:35 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add struct nexthop and nh_list list_head to fib6_info. nh_list is the
fib6_info side of the nexthop <-> fib_info relationship. Since a fib6_info
referencing a nexthop object can not have 'sibling' entries (the old way
of doing multipath routes), the nh_list is a union with fib6_siblings.
Add f6i_list list_head to 'struct nexthop' to track fib6_info entries
using a nexthop instance. Update __remove_nexthop_fib to walk f6_list
and delete fib entries using the nexthop.
Add a few nexthop helpers for use when a nexthop is added to fib6_info.
Update the fib6_info references to check for nh and take a different path
as needed.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/ip6_fib.h | 11 ++--
include/net/ip6_route.h | 13 +++-
include/net/nexthop.h | 50 ++++++++++++++++
net/ipv4/nexthop.c | 44 ++++++++++++++
net/ipv6/addrconf.c | 5 ++
net/ipv6/ip6_fib.c | 22 +++++--
net/ipv6/ndisc.c | 3 +-
net/ipv6/route.c | 156 +++++++++++++++++++++++++++++++++++++++++-------
8 files changed, 268 insertions(+), 36 deletions(-)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index ebe5d65f97e0..1a8acd51b277 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -146,7 +146,10 @@ struct fib6_info {
* destination, but not the same gateway. nsiblings is just a cache
* to speed up lookup.
*/
- struct list_head fib6_siblings;
+ union {
+ struct list_head fib6_siblings;
+ struct list_head nh_list;
+ };
unsigned int fib6_nsiblings;
refcount_t fib6_ref;
@@ -170,6 +173,7 @@ struct fib6_info {
unused:3;
struct rcu_head rcu;
+ struct nexthop *nh;
struct fib6_nh fib6_nh[0];
};
@@ -441,11 +445,6 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr)
rcu_read_unlock();
}
-static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
-{
- return f6i->fib6_nh->fib_nh_dev;
-}
-
int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index a6ce6ea856b9..7375a165fd98 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -27,6 +27,7 @@ struct route_info {
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/route.h>
+#include <net/nexthop.h>
#define RT6_LOOKUP_F_IFACE 0x00000001
#define RT6_LOOKUP_F_REACHABLE 0x00000002
@@ -66,10 +67,13 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}
+/* fib entries using a nexthop object can not be coalesced into
+ * a multipath route
+ */
static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
/* the RTF_ADDRCONF flag filters out RA's */
- return !(f6i->fib6_flags & RTF_ADDRCONF) &&
+ return !(f6i->fib6_flags & RTF_ADDRCONF) && !f6i->nh &&
f6i->fib6_nh->fib_nh_gw_family;
}
@@ -275,8 +279,13 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
{
- struct fib6_nh *nha = a->fib6_nh, *nhb = b->fib6_nh;
+ struct fib6_nh *nha, *nhb;
+
+ if (a->nh || b->nh)
+ return nexthop_cmp(a->nh, b->nh);
+ nha = a->fib6_nh;
+ nhb = b->fib6_nh;
return nha->fib_nh_dev == nhb->fib_nh_dev &&
ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) &&
!lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 2912a2d7a515..aff7b2410057 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -10,6 +10,7 @@
#define __LINUX_NEXTHOP_H
#include <linux/netdevice.h>
+#include <linux/route.h>
#include <linux/types.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
@@ -78,6 +79,7 @@ struct nh_group {
struct nexthop {
struct rb_node rb_node; /* entry on netns rbtree */
struct list_head fi_list; /* v4 entries using nh */
+ struct list_head f6i_list; /* v6 entries using nh */
struct list_head grp_list; /* nh group entries using this nh */
struct net *net;
@@ -255,4 +257,52 @@ static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
return &fi->fib_nh[nhsel];
}
+
+/*
+ * IPv6 variants
+ */
+int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
+ struct netlink_ext_ack *extack);
+
+static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
+{
+ struct nh_info *nhi;
+
+ if (nexthop_is_multipath(nh)) {
+ nh = nexthop_mpath_select(nh, 0);
+ if (!nh)
+ return NULL;
+ }
+
+ nhi = rcu_dereference_rtnl(nh->nh_info);
+ if (nhi->family == AF_INET6)
+ return &nhi->fib6_nh;
+
+ return NULL;
+}
+
+static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
+{
+ struct fib6_nh *fib6_nh;
+
+ fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh;
+ return fib6_nh->fib_nh_dev;
+}
+
+static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
+{
+ struct nexthop *nh = res->f6i->nh;
+ struct nh_info *nhi;
+
+ nh = nexthop_select_path(nh, hash);
+
+ nhi = rcu_dereference_rtnl(nh->nh_info);
+ if (nhi->reject_nh) {
+ res->fib6_type = RTN_BLACKHOLE;
+ res->fib6_flags |= RTF_REJECT;
+ res->nh = nexthop_fib6_nh(nh);
+ } else {
+ res->nh = &nhi->fib6_nh;
+ }
+}
#endif
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 63cbb04f697f..5e48762b6b5f 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -106,6 +106,7 @@ static struct nexthop *nexthop_alloc(void)
nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
if (nh) {
INIT_LIST_HEAD(&nh->fi_list);
+ INIT_LIST_HEAD(&nh->f6i_list);
INIT_LIST_HEAD(&nh->grp_list);
}
return nh;
@@ -516,6 +517,41 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
}
EXPORT_SYMBOL_GPL(nexthop_select_path);
+int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_info *nhi;
+
+ /* fib6_src is unique to a fib6_info and limits the ability to cache
+ * routes in fib6_nh within a nexthop that is potentially shared
+ * across multiple fib entries. If the config wants to use source
+ * routing it can not use nexthop objects. mlxsw also does not allow
+ * fib6_src on routes.
+ */
+ if (!ipv6_addr_any(&cfg->fc_src)) {
+ NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
+ return -EINVAL;
+ }
+
+ if (nh->is_group) {
+ struct nh_group *nhg;
+
+ nhg = rtnl_dereference(nh->nh_grp);
+ if (nhg->has_v4)
+ goto no_v4_nh;
+ } else {
+ nhi = rtnl_dereference(nh->nh_info);
+ if (nhi->family == AF_INET)
+ goto no_v4_nh;
+ }
+
+ return 0;
+no_v4_nh:
+ NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(fib6_check_nexthop);
+
static int nexthop_check_scope(struct nexthop *nh, u8 scope,
struct netlink_ext_ack *extack)
{
@@ -658,6 +694,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
{
+ struct fib6_info *f6i, *tmp;
bool do_flush = false;
struct fib_info *fi;
@@ -667,6 +704,13 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
}
if (do_flush)
fib_flush(net);
+
+ /* ip6_del_rt removes the entry from this list hence the _safe */
+ list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
+ /* __ip6_del_rt does a release, so do a hold here */
+ fib6_info_hold(f6i);
+ ipv6_stub->ip6_del_rt(net, f6i);
+ }
}
static void __remove_nexthop(struct net *net, struct nexthop *nh,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6b673d4f5ca9..7549e779335d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2421,6 +2421,10 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
goto out;
for_each_fib6_node_rt_rcu(fn) {
+ /* prefix routes only use builtin fib6_nh */
+ if (rt->nh)
+ continue;
+
if (rt->fib6_nh->fib_nh_dev->ifindex != dev->ifindex)
continue;
if (no_gw && rt->fib6_nh->fib_nh_gw_family)
@@ -6354,6 +6358,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
list_for_each_entry(ifa, &idev->addr_list, if_list) {
spin_lock(&ifa->lock);
if (ifa->rt) {
+ /* host routes only use builtin fib6_nh */
struct fib6_nh *nh = ifa->rt->fib6_nh;
int cpu;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index cdfb8500ccae..02feda73a98e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -159,6 +159,7 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
if (!f6i)
return NULL;
+ /* fib6_siblings is a union with nh_list, so this initializes both */
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
@@ -171,7 +172,11 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
WARN_ON(f6i->fib6_node);
- fib6_nh_release(f6i->fib6_nh);
+ if (f6i->nh)
+ nexthop_put(f6i->nh);
+ else
+ fib6_nh_release(f6i->fib6_nh);
+
ip_fib_metrics_put(f6i->fib6_metrics);
kfree(f6i);
}
@@ -927,6 +932,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
fib6_drop_pcpu_from(rt, table);
+ if (rt->nh && !list_empty(&rt->nh_list))
+ list_del_init(&rt->nh_list);
+
if (refcount_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
* nodes. It is not leaked, but it still holds other resources,
@@ -1334,6 +1342,8 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
err = fib6_add_rt2node(fn, rt, info, extack);
if (!err) {
+ if (rt->nh)
+ list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, sernum);
fib6_start_gc(info->nl_net, rt);
}
@@ -2295,9 +2305,13 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
{
struct fib6_info *rt = v;
struct ipv6_route_iter *iter = seq->private;
+ struct fib6_nh *fib6_nh = rt->fib6_nh;
unsigned int flags = rt->fib6_flags;
const struct net_device *dev;
+ if (rt->nh)
+ fib6_nh = nexthop_fib6_nh(rt->nh);
+
seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
#ifdef CONFIG_IPV6_SUBTREES
@@ -2305,14 +2319,14 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
- if (rt->fib6_nh->fib_nh_gw_family) {
+ if (fib6_nh->fib_nh_gw_family) {
flags |= RTF_GATEWAY;
- seq_printf(seq, "%pi6", &rt->fib6_nh->fib_nh_gw6);
+ seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6);
} else {
seq_puts(seq, "00000000000000000000000000000000");
}
- dev = rt->fib6_nh->fib_nh_dev;
+ dev = fib6_nh->fib_nh_dev;
seq_printf(seq, " %08x %08x %08x %08x %8s\n",
rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
flags, dev ? dev->name : "");
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f874dde1ee85..6e3c51109c83 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1289,9 +1289,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
!in6_dev->cnf.accept_ra_rtr_pref)
pref = ICMPV6_ROUTER_PREF_MEDIUM;
#endif
-
+ /* routes added from RAs do not use nexthop objects */
rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
-
if (rt) {
neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
rt->fib6_nh->fib_nh_dev, NULL,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fada5a13bcb2..51cb5cb027ae 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -432,15 +432,21 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
struct fib6_info *sibling, *next_sibling;
struct fib6_info *match = res->f6i;
- if (!match->fib6_nsiblings || have_oif_match)
+ if ((!match->fib6_nsiblings && !match->nh) || have_oif_match)
goto out;
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
*/
- if (!fl6->mp_hash)
+ if (!fl6->mp_hash &&
+ (!match->nh || nexthop_is_multipath(match->nh)))
fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
+ if (unlikely(match->nh)) {
+ nexthop_path_fib6_result(res, fl6->mp_hash);
+ return;
+ }
+
if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
goto out;
@@ -496,7 +502,13 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
struct fib6_nh *nh;
if (!oif && ipv6_addr_any(saddr)) {
- nh = f6i->fib6_nh;
+ if (unlikely(f6i->nh)) {
+ nh = nexthop_fib6_nh(f6i->nh);
+ if (nexthop_is_blackhole(f6i->nh))
+ goto out_blackhole;
+ } else {
+ nh = f6i->fib6_nh;
+ }
if (!(nh->fib_nh_flags & RTNH_F_DEAD))
goto out;
}
@@ -515,7 +527,14 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
goto out;
}
- nh = f6i->fib6_nh;
+ if (unlikely(f6i->nh)) {
+ nh = nexthop_fib6_nh(f6i->nh);
+ if (nexthop_is_blackhole(f6i->nh))
+ goto out_blackhole;
+ } else {
+ nh = f6i->fib6_nh;
+ }
+
if (nh->fib_nh_flags & RTNH_F_DEAD) {
res->f6i = net->ipv6.fib6_null_entry;
nh = res->f6i->fib6_nh;
@@ -524,6 +543,12 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
res->nh = nh;
res->fib6_type = res->f6i->fib6_type;
res->fib6_flags = res->f6i->fib6_flags;
+ return;
+
+out_blackhole:
+ res->fib6_flags |= RTF_REJECT;
+ res->fib6_type = RTN_BLACKHOLE;
+ res->nh = nh;
}
#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -1117,6 +1142,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
goto out;
+ } else if (res.fib6_flags & RTF_REJECT) {
+ goto do_create;
}
fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
@@ -1128,6 +1155,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
if (ip6_hold_safe(net, &rt))
dst_use_noref(&rt->dst, jiffies);
} else {
+do_create:
rt = ip6_create_rt_rcu(&res);
}
@@ -1982,6 +2010,14 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
rcu_read_unlock();
dst_hold(&rt->dst);
return rt;
+ } else if (res.fib6_flags & RTF_REJECT) {
+ rt = ip6_create_rt_rcu(&res);
+ rcu_read_unlock();
+ if (!rt) {
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ }
+ return rt;
}
fib6_select_path(net, &res, fl6, oif, false, skb, strict);
@@ -3217,7 +3253,9 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
{
struct net *net = cfg->fc_nlinfo.nl_net;
struct fib6_info *rt = NULL;
+ struct nexthop *nh = NULL;
struct fib6_table *table;
+ struct fib6_nh *fib6_nh;
int err = -EINVAL;
int addr_type;
@@ -3270,7 +3308,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
err = -ENOMEM;
- rt = fib6_info_alloc(gfp_flags, true);
+ rt = fib6_info_alloc(gfp_flags, !nh);
if (!rt)
goto out;
@@ -3310,19 +3348,35 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->fib6_src.plen = cfg->fc_src_len;
#endif
- err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
- if (err)
- goto out;
+ if (nh) {
+ if (!nexthop_get(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ goto out;
+ }
+ if (rt->fib6_src.plen) {
+ NL_SET_ERR_MSG(extack, "Nexthops can not be used wtih source routing");
+ goto out;
+ }
+ rt->nh = nh;
+ fib6_nh = nexthop_fib6_nh(rt->nh);
+ } else {
+ err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
+ if (err)
+ goto out;
- /* We cannot add true routes via loopback here,
- * they would result in kernel looping; promote them to reject routes
- */
- addr_type = ipv6_addr_type(&cfg->fc_dst);
- if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev, addr_type))
- rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
+ fib6_nh = rt->fib6_nh;
+
+ /* We cannot add true routes via loopback here, they would
+ * result in kernel looping; promote them to reject routes
+ */
+ addr_type = ipv6_addr_type(&cfg->fc_dst);
+ if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
+ addr_type))
+ rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
+ }
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
- struct net_device *dev = fib6_info_nh_dev(rt);
+ struct net_device *dev = fib6_nh->fib_nh_dev;
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
NL_SET_ERR_MSG(extack, "Invalid source address");
@@ -3678,6 +3732,9 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
goto out;
for_each_fib6_node_rt_rcu(fn) {
+ /* these routes do not use nexthops */
+ if (rt->nh)
+ continue;
if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
continue;
if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
@@ -3741,8 +3798,13 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
rcu_read_lock();
for_each_fib6_node_rt_rcu(&table->tb6_root) {
- struct fib6_nh *nh = rt->fib6_nh;
+ struct fib6_nh *nh;
+
+ /* RA routes do not use nexthops */
+ if (rt->nh)
+ continue;
+ nh = rt->fib6_nh;
if (dev == nh->fib_nh_dev &&
((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&nh->fib_nh_gw6, addr))
@@ -3993,7 +4055,8 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
- if (((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
+ if (!rt->nh &&
+ ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
rt != net->ipv6.fib6_null_entry &&
ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
spin_lock_bh(&rt6_exception_lock);
@@ -4021,8 +4084,13 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
{
struct in6_addr *gateway = (struct in6_addr *)arg;
- struct fib6_nh *nh = rt->fib6_nh;
+ struct fib6_nh *nh;
+ /* RA routes do not use nexthops */
+ if (rt->nh)
+ return 0;
+
+ nh = rt->fib6_nh;
if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
return -1;
@@ -4069,6 +4137,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
return NULL;
}
+/* only called for fib entries with builtin fib6_nh */
static bool rt6_is_dead(const struct fib6_info *rt)
{
if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
@@ -4147,7 +4216,7 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg)
const struct arg_netdev_event *arg = p_arg;
struct net *net = dev_net(arg->dev);
- if (rt != net->ipv6.fib6_null_entry &&
+ if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
rt->fib6_nh->fib_nh_dev == arg->dev) {
rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
fib6_update_sernum_upto_root(net, rt);
@@ -4172,6 +4241,7 @@ void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
}
+/* only called for fib entries with inline fib6_nh */
static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
const struct net_device *dev)
{
@@ -4232,7 +4302,7 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
const struct net_device *dev = arg->dev;
struct net *net = dev_net(dev);
- if (rt == net->ipv6.fib6_null_entry)
+ if (rt == net->ipv6.fib6_null_entry || rt->nh)
return 0;
switch (arg->event) {
@@ -4786,6 +4856,9 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
{
int nexthop_len = 0;
+ if (rt->nh)
+ nexthop_len += nla_total_size(4); /* RTA_NH_ID */
+
if (rt->fib6_nsiblings) {
nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ NLA_ALIGN(sizeof(struct rtnexthop))
@@ -4812,6 +4885,35 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
+ nexthop_len;
}
+static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
+ unsigned char *flags)
+{
+ if (nexthop_is_multipath(nh)) {
+ struct nlattr *mp;
+
+ mp = nla_nest_start(skb, RTA_MULTIPATH);
+ if (!mp)
+ goto nla_put_failure;
+
+ if (nexthop_mpath_fill_node(skb, nh))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, mp);
+ } else {
+ struct fib6_nh *fib6_nh;
+
+ fib6_nh = nexthop_fib6_nh(nh);
+ if (fib_nexthop_info(skb, &fib6_nh->nh_common,
+ flags, false) < 0)
+ goto nla_put_failure;
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst,
struct in6_addr *dest, struct in6_addr *src,
@@ -4821,6 +4923,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct rt6_info *rt6 = (struct rt6_info *)dst;
struct rt6key *rt6_dst, *rt6_src;
u32 *pmetrics, table, rt6_flags;
+ unsigned char nh_flags = 0;
struct nlmsghdr *nlh;
struct rtmsg *rtm;
long expires = 0;
@@ -4940,9 +5043,18 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
}
nla_nest_end(skb, mp);
- } else {
- unsigned char nh_flags = 0;
+ } else if (rt->nh) {
+ if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
+ goto nla_put_failure;
+
+ if (nexthop_is_blackhole(rt->nh))
+ rtm->rtm_type = RTN_BLACKHOLE;
+ if (rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
+ goto nla_put_failure;
+
+ rtm->rtm_flags |= nh_flags;
+ } else {
if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common,
&nh_flags, false) < 0)
goto nla_put_failure;
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 05/27] mlxsw: Fail attempts to use routes with nexthop objects
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (3 preceding siblings ...)
2019-06-01 3:35 ` [PATCH RFC net-next 04/27] ipv6: Plumb support for nexthop object in a fib6_info David Ahern
@ 2019-06-01 3:35 ` David Ahern
2019-06-01 3:35 ` [PATCH RFC net-next 06/27] mlx5: " David Ahern
` (21 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:35 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Fail attempts to use nexthop objects with routes until support can be
properly added.
Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 4f781358aef1..23f17ea52061 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -6122,6 +6122,20 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
return notifier_from_errno(-EINVAL);
}
+ if (fen_info->fi->nh) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
+ } else if (info->family == AF_INET6) {
+ struct fib6_entry_notifier_info *fen6_info;
+
+ fen6_info = container_of(info,
+ struct fib6_entry_notifier_info,
+ info);
+ if (fen6_info->rt->nh) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
}
break;
}
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 06/27] mlx5: Fail attempts to use routes with nexthop objects
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (4 preceding siblings ...)
2019-06-01 3:35 ` [PATCH RFC net-next 05/27] mlxsw: Fail attempts to use routes with nexthop objects David Ahern
@ 2019-06-01 3:35 ` David Ahern
2019-06-01 3:35 ` [PATCH RFC net-next 07/27] rocker: " David Ahern
` (20 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:35 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Fail attempts to use nexthop objects with routes until support can be
properly added.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index 2cbfaa8da7fc..e69766393990 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -262,6 +262,10 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
fen_info = container_of(info, struct fib_entry_notifier_info,
info);
fi = fen_info->fi;
+ if (fi->nh) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
if (fib_dev != ldev->pf[0].netdev &&
fib_dev != ldev->pf[1].netdev) {
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 07/27] rocker: Fail attempts to use routes with nexthop objects
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (5 preceding siblings ...)
2019-06-01 3:35 ` [PATCH RFC net-next 06/27] mlx5: " David Ahern
@ 2019-06-01 3:35 ` David Ahern
2019-06-01 3:35 ` [PATCH RFC net-next 08/27] nexthops: Add ipv6 helper to walk all fib6_nh in a nexthop struct David Ahern
` (19 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:35 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Fail attempts to use nexthop objects with routes until support can be
properly added.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
drivers/net/ethernet/rocker/rocker_main.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 7ae6c124bfe9..45b3325c3a38 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2214,6 +2214,10 @@ static int rocker_router_fib_event(struct notifier_block *nb,
NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
return notifier_from_errno(-EINVAL);
}
+ if (fen_info->fi->nh) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
}
memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 08/27] nexthops: Add ipv6 helper to walk all fib6_nh in a nexthop struct
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (6 preceding siblings ...)
2019-06-01 3:35 ` [PATCH RFC net-next 07/27] rocker: " David Ahern
@ 2019-06-01 3:35 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 09/27] ipv6: Handle all fib6_nh in a nexthop in fib6_drop_pcpu_from David Ahern
` (18 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:35 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
IPv6 has traditionally had a single fib6_nh per fib6_info. With
nexthops we can have multiple fib6_nh associated with a fib6_info.
Add a nexthop helper to invoke a callback for each fib6_nh in a
'struct nexthop'. If the callback returns non-0, the loop is
stopped and the return value passed to the caller.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/nexthop.h | 4 ++++
net/ipv4/nexthop.c | 31 +++++++++++++++++++++++++++++++
2 files changed, 35 insertions(+)
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index aff7b2410057..448249968903 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -305,4 +305,8 @@ static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
res->nh = &nhi->fib6_nh;
}
}
+
+int nexthop_for_each_fib6_nh(struct nexthop *nh,
+ int (*cb)(struct fib6_nh *nh, void *arg),
+ void *arg);
#endif
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 5e48762b6b5f..6d1c8927e6d3 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -517,6 +517,37 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
}
EXPORT_SYMBOL_GPL(nexthop_select_path);
+int nexthop_for_each_fib6_nh(struct nexthop *nh,
+ int (*cb)(struct fib6_nh *nh, void *arg),
+ void *arg)
+{
+ struct nh_info *nhi;
+ int err;
+
+ if (nh->is_group) {
+ struct nh_group *nhg;
+ int i;
+
+ nhg = rcu_dereference_rtnl(nh->nh_grp);
+ for (i = 0; i < nhg->num_nh; ++i) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+ nhi = rcu_dereference_rtnl(nhge->nh->nh_info);
+ err = cb(&nhi->fib6_nh, arg);
+ if (err)
+ return err;
+ }
+ } else {
+ nhi = rcu_dereference_rtnl(nh->nh_info);
+ err = cb(&nhi->fib6_nh, arg);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh);
+
int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 09/27] ipv6: Handle all fib6_nh in a nexthop in fib6_drop_pcpu_from
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (7 preceding siblings ...)
2019-06-01 3:35 ` [PATCH RFC net-next 08/27] nexthops: Add ipv6 helper to walk all fib6_nh in a nexthop struct David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 10/27] ipv6: Handle all fib6_nh in a nexthop in rt6_device_match David Ahern
` (17 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Use nexthop_for_each_fib6_nh to walk all fib6_nh in a nexthop when
dropping 'from' reference in pcpu routes.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
net/ipv6/ip6_fib.c | 31 +++++++++++++++++++++++++++----
1 file changed, 27 insertions(+), 4 deletions(-)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 02feda73a98e..fe326402cc0e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -910,19 +910,42 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
}
}
+struct fib6_nh_pcpu_arg {
+ struct fib6_info *from;
+ const struct fib6_table *table;
+};
+
+static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_pcpu_arg *arg = _arg;
+
+ __fib6_drop_pcpu_from(nh, arg->from, arg->table);
+ return 0;
+}
+
static void fib6_drop_pcpu_from(struct fib6_info *f6i,
const struct fib6_table *table)
{
- struct fib6_nh *fib6_nh;
-
/* Make sure rt6_make_pcpu_route() wont add other percpu routes
* while we are cleaning them here.
*/
f6i->fib6_destroying = 1;
mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
- fib6_nh = f6i->fib6_nh;
- __fib6_drop_pcpu_from(fib6_nh, f6i, table);
+ if (f6i->nh) {
+ struct fib6_nh_pcpu_arg arg = {
+ .from = f6i,
+ .table = table
+ };
+
+ nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from,
+ &arg);
+ } else {
+ struct fib6_nh *fib6_nh;
+
+ fib6_nh = f6i->fib6_nh;
+ __fib6_drop_pcpu_from(fib6_nh, f6i, table);
+ }
}
static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 10/27] ipv6: Handle all fib6_nh in a nexthop in rt6_device_match
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (8 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 09/27] ipv6: Handle all fib6_nh in a nexthop in fib6_drop_pcpu_from David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 11/27] ipv6: Handle all fib6_nh in a nexthop in __find_rr_leaf David Ahern
` (16 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add a hook in rt6_device_match to handle nexthop struct in a fib6_info.
The new rt6_nh_dev_match uses nexthop_for_each_fib6_nh to walk each
fib6_nh in a nexthop and call __rt6_device_match. On match,
rt6_nh_dev_match returns the fib6_nh and rt6_device_match uses it to
setup fib6_result.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
net/ipv6/route.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 52 insertions(+), 2 deletions(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 51cb5cb027ae..f15ba228beed 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -494,6 +494,45 @@ static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
return false;
}
+struct fib6_nh_dm_arg {
+ struct net *net;
+ const struct in6_addr *saddr;
+ int oif;
+ int flags;
+ struct fib6_nh *nh;
+};
+
+static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_dm_arg *arg = _arg;
+
+ arg->nh = nh;
+ return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
+ arg->flags);
+}
+
+/* returns fib6_nh from nexthop or NULL */
+static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
+ struct fib6_result *res,
+ const struct in6_addr *saddr,
+ int oif, int flags)
+{
+ struct fib6_nh_dm_arg arg = {
+ .net = net,
+ .saddr = saddr,
+ .oif = oif,
+ .flags = flags,
+ };
+
+ if (nexthop_is_blackhole(nh))
+ return NULL;
+
+ if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
+ return arg.nh;
+
+ return NULL;
+}
+
static void rt6_device_match(struct net *net, struct fib6_result *res,
const struct in6_addr *saddr, int oif, int flags)
{
@@ -514,8 +553,19 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
}
for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
- nh = spf6i->fib6_nh;
- if (__rt6_device_match(net, nh, saddr, oif, flags)) {
+ bool matched = false;
+
+ if (unlikely(spf6i->nh)) {
+ nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
+ oif, flags);
+ if (nh)
+ matched = true;
+ } else {
+ nh = spf6i->fib6_nh;
+ if (__rt6_device_match(net, nh, saddr, oif, flags))
+ matched = true;
+ }
+ if (matched) {
res->f6i = spf6i;
goto out;
}
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 11/27] ipv6: Handle all fib6_nh in a nexthop in __find_rr_leaf
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (9 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 10/27] ipv6: Handle all fib6_nh in a nexthop in rt6_device_match David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 12/27] ipv6: Handle all fib6_nh in a nexthop in rt6_nlmsg_size David Ahern
` (15 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add a hook in __find_rr_leaf to handle nexthop struct in a fib6_info.
nexthop_for_each_fib6_nh is used to walk each fib6_nh in a nexthop and
call find_match. On a match, use the fib6_nh saved in the callback arg
to setup fib6_result.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
net/ipv6/route.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 47 insertions(+), 2 deletions(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f15ba228beed..cf5e2ec9fb84 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -769,6 +769,24 @@ static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
return rc;
}
+struct fib6_nh_frl_arg {
+ u32 flags;
+ int oif;
+ int strict;
+ int *mpri;
+ bool *do_rr;
+ struct fib6_nh *nh;
+};
+
+static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_frl_arg *arg = _arg;
+
+ arg->nh = nh;
+ return find_match(nh, arg->flags, arg->oif, arg->strict,
+ arg->mpri, arg->do_rr);
+}
+
static void __find_rr_leaf(struct fib6_info *f6i_start,
struct fib6_info *nomatch, u32 metric,
struct fib6_result *res, struct fib6_info **cont,
@@ -779,6 +797,7 @@ static void __find_rr_leaf(struct fib6_info *f6i_start,
for (f6i = f6i_start;
f6i && f6i != nomatch;
f6i = rcu_dereference(f6i->fib6_next)) {
+ bool matched = false;
struct fib6_nh *nh;
if (cont && f6i->fib6_metric != metric) {
@@ -789,8 +808,34 @@ static void __find_rr_leaf(struct fib6_info *f6i_start,
if (fib6_check_expired(f6i))
continue;
- nh = f6i->fib6_nh;
- if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
+ if (unlikely(f6i->nh)) {
+ struct fib6_nh_frl_arg arg = {
+ .flags = f6i->fib6_flags,
+ .oif = oif,
+ .strict = strict,
+ .mpri = mpri,
+ .do_rr = do_rr
+ };
+
+ if (nexthop_is_blackhole(f6i->nh)) {
+ res->fib6_flags = RTF_REJECT;
+ res->fib6_type = RTN_BLACKHOLE;
+ res->f6i = f6i;
+ res->nh = nexthop_fib6_nh(f6i->nh);
+ return;
+ }
+ if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
+ &arg)) {
+ matched = true;
+ nh = arg.nh;
+ }
+ } else {
+ nh = f6i->fib6_nh;
+ if (find_match(nh, f6i->fib6_flags, oif, strict,
+ mpri, do_rr))
+ matched = true;
+ }
+ if (matched) {
res->f6i = f6i;
res->nh = nh;
res->fib6_flags = f6i->fib6_flags;
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 12/27] ipv6: Handle all fib6_nh in a nexthop in rt6_nlmsg_size
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (10 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 11/27] ipv6: Handle all fib6_nh in a nexthop in __find_rr_leaf David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 13/27] ipv6: Handle all fib6_nh in a nexthop in fib6_info_uses_dev David Ahern
` (14 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add a hook in rt6_nlmsg_size to handle nexthop struct in a fib6_info.
rt6_nh_nlmsg_size is used to sum the space needed for all nexthops in
the fib entry.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
net/ipv6/route.c | 49 +++++++++++++++++++++++++++++++++++++------------
1 file changed, 37 insertions(+), 12 deletions(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cf5e2ec9fb84..0c9ba144b8d0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -104,7 +104,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
int strict);
-static size_t rt6_nlmsg_size(struct fib6_info *rt);
+static size_t rt6_nlmsg_size(struct fib6_info *f6i);
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst,
struct in6_addr *dest, struct in6_addr *src,
@@ -4947,20 +4947,46 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
return ip6_route_add(&cfg, GFP_KERNEL, extack);
}
-static size_t rt6_nlmsg_size(struct fib6_info *rt)
+/* add the overhead of this fib6_nh to nexthop_len */
+static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
{
- int nexthop_len = 0;
+ int *nexthop_len = arg;
- if (rt->nh)
- nexthop_len += nla_total_size(4); /* RTA_NH_ID */
+ *nexthop_len += nla_total_size(0) /* RTA_MULTIPATH */
+ + NLA_ALIGN(sizeof(struct rtnexthop))
+ + nla_total_size(16); /* RTA_GATEWAY */
+
+ if (nh->fib_nh_lws) {
+ /* RTA_ENCAP_TYPE */
+ *nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
+ /* RTA_ENCAP */
+ *nexthop_len += nla_total_size(2);
+ }
- if (rt->fib6_nsiblings) {
- nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
- + NLA_ALIGN(sizeof(struct rtnexthop))
- + nla_total_size(16) /* RTA_GATEWAY */
- + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws);
+ return 0;
+}
- nexthop_len *= rt->fib6_nsiblings;
+static size_t rt6_nlmsg_size(struct fib6_info *f6i)
+{
+ int nexthop_len;
+
+ if (f6i->nh) {
+ nexthop_len = nla_total_size(4); /* RTA_NH_ID */
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
+ &nexthop_len);
+ } else {
+ struct fib6_nh *nh = f6i->fib6_nh;
+
+ nexthop_len = 0;
+ if (f6i->fib6_nsiblings) {
+ nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ + NLA_ALIGN(sizeof(struct rtnexthop))
+ + nla_total_size(16) /* RTA_GATEWAY */
+ + lwtunnel_get_encap_size(nh->fib_nh_lws);
+
+ nexthop_len *= f6i->fib6_nsiblings;
+ }
+ nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
}
return NLMSG_ALIGN(sizeof(struct rtmsg))
@@ -4976,7 +5002,6 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+ nla_total_size(1) /* RTA_PREF */
- + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws)
+ nexthop_len;
}
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 13/27] ipv6: Handle all fib6_nh in a nexthop in fib6_info_uses_dev
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (11 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 12/27] ipv6: Handle all fib6_nh in a nexthop in rt6_nlmsg_size David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 14/27] ipv6: Handle all fib6_nh in a nexthop in exception handling David Ahern
` (13 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add a hook in fib6_info_uses_dev to handle nexthop struct in a fib6_info.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
net/ipv6/route.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0c9ba144b8d0..680d61b65647 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5202,9 +5202,27 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
return -EMSGSIZE;
}
+static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
+{
+ const struct net_device *dev = arg;
+
+ if (nh->fib_nh_dev == dev)
+ return 1;
+
+ return 0;
+}
+
static bool fib6_info_uses_dev(const struct fib6_info *f6i,
const struct net_device *dev)
{
+ if (f6i->nh) {
+ struct net_device *_dev = (struct net_device *)dev;
+
+ return !!nexthop_for_each_fib6_nh(f6i->nh,
+ fib6_info_nh_uses_dev,
+ _dev);
+ }
+
if (f6i->fib6_nh->fib_nh_dev == dev)
return true;
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 14/27] ipv6: Handle all fib6_nh in a nexthop in exception handling
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (12 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 13/27] ipv6: Handle all fib6_nh in a nexthop in fib6_info_uses_dev David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 15/27] ipv6: Handle all fib6_nh in a nexthop in __ip6_route_redirect David Ahern
` (12 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add a hook in rt6_flush_exceptions, rt6_remove_exception_rt,
rt6_update_exception_stamp_rt, and rt6_age_exceptions to handle
nexthop struct in a fib6_info.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
net/ipv6/route.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 106 insertions(+), 3 deletions(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 680d61b65647..49d58ad9f76e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1747,9 +1747,22 @@ static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
spin_unlock_bh(&rt6_exception_lock);
}
+static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
+{
+ struct fib6_info *f6i = arg;
+
+ fib6_nh_flush_exceptions(nh, f6i);
+
+ return 0;
+}
+
void rt6_flush_exceptions(struct fib6_info *f6i)
{
- fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
+ if (f6i->nh)
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
+ f6i);
+ else
+ fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
}
/* Find cached rt in the hash table inside passed in rt
@@ -1836,6 +1849,24 @@ static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
return err;
}
+struct fib6_nh_excptn_arg {
+ struct rt6_info *rt;
+ int plen;
+ bool found;
+};
+
+static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_excptn_arg *arg = _arg;
+ int err;
+
+ err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
+ if (err == 0)
+ arg->found = true;
+
+ return 0;
+}
+
static int rt6_remove_exception_rt(struct rt6_info *rt)
{
struct fib6_info *from;
@@ -1844,6 +1875,17 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
if (!from || !(rt->rt6i_flags & RTF_CACHE))
return -EINVAL;
+ if (from->nh) {
+ struct fib6_nh_excptn_arg arg = {
+ .rt = rt,
+ .plen = from->fib6_src.plen
+ };
+
+ nexthop_for_each_fib6_nh(from->nh, rt6_nh_remove_exception_rt,
+ &arg);
+ return arg.found ? 0 : -ENOENT;
+ }
+
return fib6_nh_remove_exception(from->fib6_nh,
from->fib6_src.plen, rt);
}
@@ -1874,9 +1916,33 @@ static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
rt6_ex->stamp = jiffies;
}
+struct fib6_nh_match_arg {
+ const struct net_device *dev;
+ const struct in6_addr *gw;
+ struct fib6_nh *match;
+};
+
+/* determine if fib6_nh has given device and gateway */
+static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_match_arg *arg = _arg;
+
+ if (arg->dev != nh->fib_nh_dev ||
+ (arg->gw && !nh->fib_nh_gw_family) ||
+ (!arg->gw && nh->fib_nh_gw_family) ||
+ (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
+ return 0;
+
+ arg->match = nh;
+
+ /* found a match, break the loop */
+ return 1;
+}
+
static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
{
struct fib6_info *from;
+ struct fib6_nh *fib6_nh;
rcu_read_lock();
@@ -1884,7 +1950,21 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
if (!from || !(rt->rt6i_flags & RTF_CACHE))
goto unlock;
- fib6_nh_update_exception(from->fib6_nh, from->fib6_src.plen, rt);
+ if (from->nh) {
+ struct fib6_nh_match_arg arg = {
+ .dev = rt->dst.dev,
+ .gw = &rt->rt6i_gateway,
+ };
+
+ nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
+
+ if (!arg.match)
+ return;
+ fib6_nh = arg.match;
+ } else {
+ fib6_nh = from->fib6_nh;
+ }
+ fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
unlock:
rcu_read_unlock();
}
@@ -2046,11 +2126,34 @@ static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
rcu_read_unlock_bh();
}
+struct fib6_nh_age_excptn_arg {
+ struct fib6_gc_args *gc_args;
+ unsigned long now;
+};
+
+static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_age_excptn_arg *arg = _arg;
+
+ fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
+ return 0;
+}
+
void rt6_age_exceptions(struct fib6_info *f6i,
struct fib6_gc_args *gc_args,
unsigned long now)
{
- fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
+ if (f6i->nh) {
+ struct fib6_nh_age_excptn_arg arg = {
+ .gc_args = gc_args,
+ .now = now
+ };
+
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
+ &arg);
+ } else {
+ fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
+ }
}
/* must be called with rcu lock held */
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 15/27] ipv6: Handle all fib6_nh in a nexthop in __ip6_route_redirect
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (13 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 14/27] ipv6: Handle all fib6_nh in a nexthop in exception handling David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 16/27] ipv6: Handle all fib6_nh in a nexthop in rt6_do_redirect David Ahern
` (11 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add a hook in __ip6_route_redirect to handle a nexthop struct in a
fib6_info. Use nexthop_for_each_fib6_nh and fib6_nh_redirect_match
to call ip6_redirect_nh_match for each fib6_nh looking for a match.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
net/ipv6/route.c | 39 +++++++++++++++++++++++++++++++++++----
1 file changed, 35 insertions(+), 4 deletions(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 49d58ad9f76e..fbbceca19e50 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2800,6 +2800,21 @@ static bool ip6_redirect_nh_match(const struct fib6_result *res,
return true;
}
+struct fib6_nh_rd_arg {
+ struct fib6_result *res;
+ struct flowi6 *fl6;
+ const struct in6_addr *gw;
+ struct rt6_info **ret;
+};
+
+static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_rd_arg *arg = _arg;
+
+ arg->res->nh = nh;
+ return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
+}
+
/* Handle redirects */
struct ip6rd_flowi {
struct flowi6 fl6;
@@ -2815,6 +2830,12 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
struct rt6_info *ret = NULL;
struct fib6_result res = {};
+ struct fib6_nh_rd_arg arg = {
+ .res = &res,
+ .fl6 = fl6,
+ .gw = &rdfl->gateway,
+ .ret = &ret
+ };
struct fib6_info *rt;
struct fib6_node *fn;
@@ -2839,14 +2860,24 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
restart:
for_each_fib6_node_rt_rcu(fn) {
res.f6i = rt;
- res.nh = rt->fib6_nh;
-
if (fib6_check_expired(rt))
continue;
if (rt->fib6_flags & RTF_REJECT)
break;
- if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
- goto out;
+ if (unlikely(rt->nh)) {
+ if (nexthop_is_blackhole(rt->nh))
+ continue;
+ /* on match, res->nh is filled in and potentially ret */
+ if (nexthop_for_each_fib6_nh(rt->nh,
+ fib6_nh_redirect_match,
+ &arg))
+ goto out;
+ } else {
+ res.nh = rt->fib6_nh;
+ if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
+ &ret))
+ goto out;
+ }
}
if (!rt)
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 16/27] ipv6: Handle all fib6_nh in a nexthop in rt6_do_redirect
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (14 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 15/27] ipv6: Handle all fib6_nh in a nexthop in __ip6_route_redirect David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 17/27] ipv6: Handle all fib6_nh in a nexthop in mtu updates David Ahern
` (10 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Use nexthop_for_each_fib6_nh and fib6_nh_find_match to find the
fib6_nh in a nexthop that correlates to the device and gateway
in the rt6_info.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
net/ipv6/route.c | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fbbceca19e50..bf682ab05044 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3909,7 +3909,25 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
if (!res.f6i)
goto out;
- res.nh = res.f6i->fib6_nh;
+ if (res.f6i->nh) {
+ struct fib6_nh_match_arg arg = {
+ .dev = dst->dev,
+ .gw = &rt->rt6i_gateway,
+ };
+
+ nexthop_for_each_fib6_nh(res.f6i->nh,
+ fib6_nh_find_match, &arg);
+
+ /* fib6_info uses a nexthop that does not have fib6_nh
+ * using the dst->dev. Should be impossible
+ */
+ if (!arg.match)
+ return;
+ res.nh = arg.match;
+ } else {
+ res.nh = res.f6i->fib6_nh;
+ }
+
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 17/27] ipv6: Handle all fib6_nh in a nexthop in mtu updates
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (15 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 16/27] ipv6: Handle all fib6_nh in a nexthop in rt6_do_redirect David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 18/27] ipv4: Allow routes to use nexthop objects David Ahern
` (9 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Use nexthop_for_each_fib6_nh to call fib6_nh_mtu_change for each
fib6_nh in a nexthop for rt6_mtu_change_route. For __ip6_rt_update_pmtu,
we need to find the nexthop that correlates to the device and gateway
in the rt6_info.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
net/ipv6/route.c | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index bf682ab05044..d967753bc75a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2690,10 +2690,31 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
rcu_read_unlock();
return;
}
- res.nh = res.f6i->fib6_nh;
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
+ if (res.f6i->nh) {
+ struct fib6_nh_match_arg arg = {
+ .dev = dst->dev,
+ .gw = &rt6->rt6i_gateway,
+ };
+
+ nexthop_for_each_fib6_nh(res.f6i->nh,
+ fib6_nh_find_match, &arg);
+
+ /* fib6_info uses a nexthop that does not have fib6_nh
+ * using the dst->dev + gw. Should be impossible.
+ */
+ if (!arg.match) {
+ rcu_read_unlock();
+ return;
+ }
+
+ res.nh = arg.match;
+ } else {
+ res.nh = res.f6i->fib6_nh;
+ }
+
nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
@@ -4660,6 +4681,12 @@ static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
return 0;
arg->f6i = f6i;
+ if (f6i->nh) {
+ /* fib6_nh_mtu_change only returns 0, so this is safe */
+ return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
+ arg);
+ }
+
return fib6_nh_mtu_change(f6i->fib6_nh, arg);
}
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 18/27] ipv4: Allow routes to use nexthop objects
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (16 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 17/27] ipv6: Handle all fib6_nh in a nexthop in mtu updates David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 19/27] ipv4: Optimization for fib_info lookup with nexthops David Ahern
` (8 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add support for RTA_NH_ID attribute to allow a user to specify a
nexthop id to use with a route. fc_nh_id is added to fib_config to
hold the value passed in the RTA_NH_ID attribute. If a nexthop id
is given, the gateway, device, encap and multipath attributes can
not be set.
Update fib_nh_match to check ids on a route delete.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/ip_fib.h | 1 +
net/ipv4/fib_frontend.c | 19 +++++++++++++++++++
net/ipv4/fib_semantics.c | 15 +++++++++++++++
3 files changed, 35 insertions(+)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 071d280de389..74891397f694 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -44,6 +44,7 @@ struct fib_config {
u32 fc_flags;
u32 fc_priority;
__be32 fc_prefsrc;
+ u32 fc_nh_id;
struct nlattr *fc_mx;
struct rtnexthop *fc_mp;
int fc_mx_len;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8e49baa00d20..f9f7209a219e 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -667,6 +667,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_IP_PROTO] = { .type = NLA_U8 },
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
+ [RTA_NH_ID] = { .type = NLA_U32 },
};
int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
@@ -804,6 +805,18 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
if (err < 0)
goto errout;
break;
+ case RTA_NH_ID:
+ cfg->fc_nh_id = nla_get_u32(attr);
+ break;
+ }
+ }
+
+ if (cfg->fc_nh_id) {
+ if (cfg->fc_oif || cfg->fc_gw_family ||
+ cfg->fc_encap || cfg->fc_mp) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop specification and nexthop id are mutually exclusive");
+ return -EINVAL;
}
}
@@ -830,6 +843,12 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
+ if (cfg.fc_nh_id && !nexthop_find_by_id(net, cfg.fc_nh_id)) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ err = -EINVAL;
+ goto errout;
+ }
+
tb = fib_get_table(net, cfg.fc_table);
if (!tb) {
NL_SET_ERR_MSG(extack, "FIB table does not exist");
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 01e587a5dcb1..08bbdf3d5173 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -793,6 +793,12 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
return 1;
+ if (cfg->fc_nh_id) {
+ if (fi->nh && cfg->fc_nh_id == fi->nh->id)
+ return 0;
+ return 1;
+ }
+
if (cfg->fc_oif || cfg->fc_gw_family) {
struct fib_nh *nh = fib_info_nh(fi, 0);
@@ -1306,6 +1312,15 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
+ if (cfg->fc_nh_id) {
+ nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+ if (!nh) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ goto err_inval;
+ }
+ nhs = 0;
+ }
+
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (cfg->fc_mp) {
nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 19/27] ipv4: Optimization for fib_info lookup with nexthops
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (17 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 18/27] ipv4: Allow routes to use nexthop objects David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 20/27] ipv6: Allow routes to use nexthop objects David Ahern
` (7 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Be optimistic about re-using a fib_info when nexthop id is given and
the route does not use metrics. Avoids a memory allocation which in
most cases is expected to be freed anyways.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
net/ipv4/fib_semantics.c | 71 ++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 65 insertions(+), 6 deletions(-)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 08bbdf3d5173..d9f8c3c0fb0d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -329,14 +329,32 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val)
(val >> (DEVINDEX_HASHBITS * 2))) & mask;
}
-static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
+static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
+ u32 prefsrc, u32 priority)
+{
+ unsigned int val = init_val;
+
+ val ^= (protocol << 8) | scope;
+ val ^= prefsrc;
+ val ^= priority;
+
+ return val;
+}
+
+static unsigned int fib_info_hashfn_result(unsigned int val)
{
unsigned int mask = (fib_info_hash_size - 1);
- unsigned int val = fi->fib_nhs;
- val ^= (fi->fib_protocol << 8) | fi->fib_scope;
- val ^= (__force u32)fi->fib_prefsrc;
- val ^= fi->fib_priority;
+ return (val ^ (val >> 7) ^ (val >> 12)) & mask;
+}
+
+static inline unsigned int fib_info_hashfn(struct fib_info *fi)
+{
+ unsigned int val;
+
+ val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol,
+ fi->fib_scope, (__force u32)fi->fib_prefsrc,
+ fi->fib_priority);
if (fi->nh) {
val ^= fib_devindex_hashfn(fi->nh->id);
@@ -346,7 +364,40 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
} endfor_nexthops(fi)
}
- return (val ^ (val >> 7) ^ (val >> 12)) & mask;
+ return fib_info_hashfn_result(val);
+}
+
+/* no metrics, only nexthop id */
+static struct fib_info *fib_find_info_nh(struct net *net,
+ const struct fib_config *cfg)
+{
+ struct hlist_head *head;
+ struct fib_info *fi;
+ unsigned int hash;
+
+ hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id),
+ cfg->fc_protocol, cfg->fc_scope,
+ (__force u32)cfg->fc_prefsrc,
+ cfg->fc_priority);
+ hash = fib_info_hashfn_result(hash);
+ head = &fib_info_hash[hash];
+
+ hlist_for_each_entry(fi, head, fib_hash) {
+ if (!net_eq(fi->fib_net, net))
+ continue;
+ if (!fi->nh || fi->nh->id != cfg->fc_nh_id)
+ continue;
+ if (cfg->fc_protocol == fi->fib_protocol &&
+ cfg->fc_scope == fi->fib_scope &&
+ cfg->fc_prefsrc == fi->fib_prefsrc &&
+ cfg->fc_priority == fi->fib_priority &&
+ cfg->fc_type == fi->fib_type &&
+ cfg->fc_table == fi->fib_tb_id &&
+ !((cfg->fc_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK))
+ return fi;
+ }
+
+ return NULL;
}
static struct fib_info *fib_find_info(struct fib_info *nfi)
@@ -1313,6 +1364,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
}
if (cfg->fc_nh_id) {
+ if (!cfg->fc_mx) {
+ fi = fib_find_info_nh(net, cfg);
+ if (fi) {
+ fi->fib_treeref++;
+ return fi;
+ }
+ }
+
nh = nexthop_find_by_id(net, cfg->fc_nh_id);
if (!nh) {
NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 20/27] ipv6: Allow routes to use nexthop objects
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (18 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 19/27] ipv4: Optimization for fib_info lookup with nexthops David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 21/27] nexthops: add support for replace David Ahern
` (6 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add support for RTA_NH_ID attribute to allow a user to specify a
nexthop id to use with a route. fc_nh_id is added to fib6_config to
hold the value passed in the RTA_NH_ID attribute. If a nexthop id
is given, the gateway, device, encap and multipath attributes can
not be set.
Update ip6_route_del to check metric and protocol before nexthop
specs. If fc_nh_id is set, then it must match the id in the route
entry. Since IPv6 allows delete of a cached entry (an exception),
add ip6_del_cached_rt_nh to cycle through all of the fib6_nh in
a fib entry if it is using a nexthop.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/ip6_fib.h | 1 +
net/ipv6/route.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 82 insertions(+), 8 deletions(-)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1a8acd51b277..ef946578341f 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -53,6 +53,7 @@ struct fib6_config {
u16 fc_delete_all_nh : 1,
fc_ignore_dev_down:1,
__unused : 14;
+ u32 fc_nh_id;
struct in6_addr fc_dst;
struct in6_addr fc_src;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d967753bc75a..29c2f5086116 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3541,6 +3541,16 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
#endif
+ if (cfg->fc_nh_id) {
+ nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+ if (!nh) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ goto out;
+ }
+ err = fib6_check_nexthop(nh, cfg, extack);
+ if (err)
+ goto out;
+ }
err = -ENOBUFS;
if (cfg->fc_nlinfo.nlh &&
@@ -3772,6 +3782,30 @@ static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
return 0;
}
+struct fib6_nh_del_cached_rt_arg {
+ struct fib6_config *cfg;
+ struct fib6_info *f6i;
+};
+
+static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_del_cached_rt_arg *arg = _arg;
+ int rc;
+
+ rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
+ return rc != -ESRCH ? rc : 0;
+}
+
+static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
+{
+ struct fib6_nh_del_cached_rt_arg arg = {
+ .cfg = cfg,
+ .f6i = f6i
+ };
+
+ return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
+}
+
static int ip6_route_del(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -3797,11 +3831,20 @@ static int ip6_route_del(struct fib6_config *cfg,
for_each_fib6_node_rt_rcu(fn) {
struct fib6_nh *nh;
- nh = rt->fib6_nh;
- if (cfg->fc_flags & RTF_CACHE) {
- int rc;
+ if (rt->nh && rt->nh->id != cfg->fc_nh_id)
+ continue;
- rc = ip6_del_cached_rt(cfg, rt, nh);
+ if (cfg->fc_flags & RTF_CACHE) {
+ int rc = 0;
+
+ if (rt->nh) {
+ rc = ip6_del_cached_rt_nh(cfg, rt);
+ } else if (cfg->fc_nh_id) {
+ continue;
+ } else {
+ nh = rt->fib6_nh;
+ rc = ip6_del_cached_rt(cfg, rt, nh);
+ }
if (rc != -ESRCH) {
rcu_read_unlock();
return rc;
@@ -3809,6 +3852,23 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
}
+ if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
+ continue;
+ if (cfg->fc_protocol &&
+ cfg->fc_protocol != rt->fib6_protocol)
+ continue;
+
+ if (rt->nh) {
+ if (!fib6_info_hold_safe(rt))
+ continue;
+ rcu_read_unlock();
+
+ return __ip6_del_rt(rt, &cfg->fc_nlinfo);
+ }
+ if (cfg->fc_nh_id)
+ continue;
+
+ nh = rt->fib6_nh;
if (cfg->fc_ifindex &&
(!nh->fib_nh_dev ||
nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
@@ -3816,10 +3876,6 @@ static int ip6_route_del(struct fib6_config *cfg,
if (cfg->fc_flags & RTF_GATEWAY &&
!ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
continue;
- if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
- continue;
- if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
- continue;
if (!fib6_info_hold_safe(rt))
continue;
rcu_read_unlock();
@@ -4719,6 +4775,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_IP_PROTO] = { .type = NLA_U8 },
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
+ [RTA_NH_ID] = { .type = NLA_U32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -4765,6 +4822,16 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
+ if (tb[RTA_NH_ID]) {
+ if (tb[RTA_GATEWAY] || tb[RTA_OIF] ||
+ tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop specification and nexthop id are mutually exclusive");
+ goto errout;
+ }
+ cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
+ }
+
if (tb[RTA_GATEWAY]) {
cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
cfg->fc_flags |= RTF_GATEWAY;
@@ -5099,6 +5166,12 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (cfg.fc_nh_id &&
+ !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ return -EINVAL;
+ }
+
if (cfg.fc_mp)
return ip6_route_multipath_del(&cfg, extack);
else {
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 21/27] nexthops: add support for replace
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (19 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 20/27] ipv6: Allow routes to use nexthop objects David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 22/27] selftests: Add test cases for nexthop objects David Ahern
` (5 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add support for atomically upating a nexthop config.
When updating a nexthop, walk the lists of associated fib entries and
verify the new config is valid. After replace, bump the sequence counters
for FIB entries to invalidate any dst entries.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
net/ipv4/nexthop.c | 244 +++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 227 insertions(+), 17 deletions(-)
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 6d1c8927e6d3..92aeff6ffe26 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -17,7 +17,7 @@
#include <net/sock.h>
static void remove_nexthop(struct net *net, struct nexthop *nh,
- struct nl_info *nlinfo);
+ bool skip_fib, struct nl_info *nlinfo);
#define NH_DEV_HASHBITS 8
#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
@@ -548,6 +548,16 @@ int nexthop_for_each_fib6_nh(struct nexthop *nh,
}
EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh);
+static int check_src_addr(const struct in6_addr *saddr,
+ struct netlink_ext_ack *extack)
+{
+ if (!ipv6_addr_any(saddr)) {
+ NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
+ return -EINVAL;
+ }
+ return 0;
+}
+
int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -559,10 +569,8 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
* routing it can not use nexthop objects. mlxsw also does not allow
* fib6_src on routes.
*/
- if (!ipv6_addr_any(&cfg->fc_src)) {
- NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
+ if (cfg && check_src_addr(&cfg->fc_src, extack) < 0)
return -EINVAL;
- }
if (nh->is_group) {
struct nh_group *nhg;
@@ -583,6 +591,25 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
}
EXPORT_SYMBOL_GPL(fib6_check_nexthop);
+/* if existing nexthop has ipv6 routes linked to it, need
+ * to verify this new spec works with ipv6
+ */
+static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
+ struct netlink_ext_ack *extack)
+{
+ struct fib6_info *f6i;
+
+ if (list_empty(&old->f6i_list))
+ return 0;
+
+ list_for_each_entry(f6i, &old->f6i_list, nh_list) {
+ if (check_src_addr(&f6i->fib6_src.addr, extack) < 0)
+ return -EINVAL;
+ }
+
+ return fib6_check_nexthop(new, NULL, extack);
+}
+
static int nexthop_check_scope(struct nexthop *nh, u8 scope,
struct netlink_ext_ack *extack)
{
@@ -631,6 +658,21 @@ int fib_check_nexthop(struct nexthop *nh, u8 scope,
return err;
}
+static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
+ struct netlink_ext_ack *extack)
+{
+ struct fib_info *fi;
+
+ list_for_each_entry(fi, &old->fi_list, nh_list) {
+ int err;
+
+ err = fib_check_nexthop(new, fi->fib_scope, extack);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static void nh_group_rebalance(struct nh_group *nhg)
{
int total = 0;
@@ -688,7 +730,7 @@ static void remove_nh_grp_entry(struct nh_grp_entry *nhge,
}
static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
- struct nl_info *nlinfo)
+ bool skip_fib, struct nl_info *nlinfo)
{
struct nh_grp_entry *nhge, *tmp;
@@ -701,7 +743,7 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
/* if this group has no more entries then remove it */
if (!nhg->num_nh)
- remove_nexthop(net, nhge->nh_parent, nlinfo);
+ remove_nexthop(net, nhge->nh_parent, skip_fib, nlinfo);
}
}
@@ -723,6 +765,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
}
}
+/* not called for nexthop replace */
static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
{
struct fib6_info *f6i, *tmp;
@@ -745,9 +788,10 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
}
static void __remove_nexthop(struct net *net, struct nexthop *nh,
- struct nl_info *nlinfo)
+ bool skip_fib, struct nl_info *nlinfo)
{
- __remove_nexthop_fib(net, nh);
+ if (!skip_fib)
+ __remove_nexthop_fib(net, nh);
if (nh->is_group) {
remove_nexthop_group(nh, nlinfo);
@@ -758,12 +802,12 @@ static void __remove_nexthop(struct net *net, struct nexthop *nh,
if (nhi->fib_nhc.nhc_dev)
hlist_del(&nhi->dev_hash);
- remove_nexthop_from_groups(net, nh, nlinfo);
+ remove_nexthop_from_groups(net, nh, skip_fib, nlinfo);
}
}
static void remove_nexthop(struct net *net, struct nexthop *nh,
- struct nl_info *nlinfo)
+ bool skip_fib, struct nl_info *nlinfo)
{
/* remove from the tree */
rb_erase(&nh->rb_node, &net->nexthop.rb_root);
@@ -771,16 +815,177 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
if (nlinfo)
nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);
- __remove_nexthop(net, nh, nlinfo);
+ __remove_nexthop(net, nh, skip_fib, nlinfo);
nh_base_seq_inc(net);
nexthop_put(nh);
}
+/* if any FIB entries reference this nexthop, any dst entries
+ * need to be regenerated
+ */
+static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
+{
+ struct fib6_info *f6i;
+
+ if (!list_empty(&nh->fi_list))
+ rt_cache_flush(net);
+
+ list_for_each_entry(f6i, &nh->f6i_list, nh_list)
+ ipv6_stub->fib6_update_sernum(net, f6i);
+}
+
+static int replace_nexthop_grp(struct net *net, struct nexthop *old,
+ struct nexthop *new,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_group *oldg, *newg;
+ int i;
+
+ if (!new->is_group) {
+ NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
+ return -EINVAL;
+ }
+
+ oldg = rtnl_dereference(old->nh_grp);
+ newg = rtnl_dereference(new->nh_grp);
+
+ /* update parents - used by nexthop code for cleanup */
+ for (i = 0; i < newg->num_nh; ++i)
+ newg->nh_entries[i].nh_parent = old;
+
+ rcu_assign_pointer(old->nh_grp, newg);
+
+ for (i = 0; i < oldg->num_nh; ++i)
+ oldg->nh_entries[i].nh_parent = new;
+
+ rcu_assign_pointer(new->nh_grp, oldg);
+
+ return 0;
+}
+
+static int replace_nexthop_single(struct net *net, struct nexthop *old,
+ struct nexthop *new,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_info *oldi, *newi;
+
+ if (new->is_group) {
+ NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
+ return -EINVAL;
+ }
+
+ oldi = rtnl_dereference(old->nh_info);
+ newi = rtnl_dereference(new->nh_info);
+
+ newi->nh_parent = old;
+ oldi->nh_parent = new;
+
+ old->protocol = new->protocol;
+ old->nh_flags = new->nh_flags;
+
+ rcu_assign_pointer(old->nh_info, newi);
+ rcu_assign_pointer(new->nh_info, oldi);
+
+ return 0;
+}
+
+static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
+ struct nl_info *info)
+{
+ struct fib6_info *f6i;
+
+ if (!list_empty(&nh->fi_list)) {
+ struct fib_info *fi;
+
+ /* expectation is a few fib_info per nexthop and then
+ * a lot of routes per fib_info. So mark the fib_info
+ * and then walk the fib tables once
+ */
+ list_for_each_entry(fi, &nh->fi_list, nh_list)
+ fi->nh_updated = true;
+
+ fib_info_notify_update(net, info);
+
+ list_for_each_entry(fi, &nh->fi_list, nh_list)
+ fi->nh_updated = false;
+ }
+
+ list_for_each_entry(f6i, &nh->f6i_list, nh_list)
+ ipv6_stub->fib6_rt_update(net, f6i, info);
+}
+
+/* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
+ * linked to this nexthop and for all groups that the nexthop
+ * is a member of
+ */
+static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
+ struct nl_info *info)
+{
+ struct nh_grp_entry *nhge;
+
+ __nexthop_replace_notify(net, nh, info);
+
+ list_for_each_entry(nhge, &nh->grp_list, nh_list)
+ __nexthop_replace_notify(net, nhge->nh_parent, info);
+}
+
static int replace_nexthop(struct net *net, struct nexthop *old,
struct nexthop *new, struct netlink_ext_ack *extack)
{
- return -EEXIST;
+ bool new_is_reject = false;
+ struct nh_grp_entry *nhge;
+ int err;
+
+ /* check that existing FIB entries are ok with the
+ * new nexthop definition
+ */
+ err = fib_check_nh_list(old, new, extack);
+ if (err)
+ return err;
+
+ err = fib6_check_nh_list(old, new, extack);
+ if (err)
+ return err;
+
+ if (!new->is_group) {
+ struct nh_info *nhi = rtnl_dereference(new->nh_info);
+
+ new_is_reject = nhi->reject_nh;
+ }
+
+ list_for_each_entry(nhge, &old->grp_list, nh_list) {
+ /* if new nexthop is a blackhole, any groups using this
+ * nexthop cannot have more than 1 path
+ */
+ if (new_is_reject &&
+ nexthop_num_path(nhge->nh_parent) > 1) {
+ NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path");
+ return -EINVAL;
+ }
+
+ err = fib_check_nh_list(nhge->nh_parent, new, extack);
+ if (err)
+ return err;
+
+ err = fib6_check_nh_list(nhge->nh_parent, new, extack);
+ if (err)
+ return err;
+ }
+
+ if (old->is_group)
+ err = replace_nexthop_grp(net, old, new, extack);
+ else
+ err = replace_nexthop_single(net, old, new, extack);
+
+ if (!err) {
+ nh_rt_cache_flush(net, old);
+
+ __remove_nexthop(net, new, true, NULL);
+ nexthop_put(new);
+ }
+
+ return err;
}
/* called with rtnl_lock held */
@@ -792,6 +997,7 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
bool create = !!(cfg->nlflags & NLM_F_CREATE);
u32 new_id = new_nh->id;
+ int replace_notify = 0;
int rc = -EEXIST;
pp = &root->rb_node;
@@ -811,8 +1017,10 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
pp = &next->rb_right;
} else if (replace) {
rc = replace_nexthop(net, nh, new_nh, extack);
- if (!rc)
+ if (!rc) {
new_nh = nh; /* send notification with old nh */
+ replace_notify = 1;
+ }
goto out;
} else {
/* id already exists and not a replace */
@@ -833,6 +1041,8 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
if (!rc) {
nh_base_seq_inc(net);
nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
+ if (replace_notify)
+ nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
}
return rc;
@@ -852,7 +1062,7 @@ static void nexthop_flush_dev(struct net_device *dev)
if (nhi->fib_nhc.nhc_dev != dev)
continue;
- remove_nexthop(net, nhi->nh_parent, NULL);
+ remove_nexthop(net, nhi->nh_parent, false, NULL);
}
}
@@ -865,7 +1075,7 @@ static void flush_all_nexthops(struct net *net)
while ((node = rb_first(root))) {
nh = rb_entry(node, struct nexthop, rb_node);
- remove_nexthop(net, nh, NULL);
+ remove_nexthop(net, nh, false, NULL);
cond_resched();
}
}
@@ -1079,7 +1289,7 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
err = insert_nexthop(net, nh, cfg, extack);
if (err) {
- __remove_nexthop(net, nh, NULL);
+ __remove_nexthop(net, nh, false, NULL);
nexthop_put(nh);
nh = ERR_PTR(err);
}
@@ -1339,7 +1549,7 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!nh)
return -ENOENT;
- remove_nexthop(net, nh, &nlinfo);
+ remove_nexthop(net, nh, false, &nlinfo);
return 0;
}
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 22/27] selftests: Add test cases for nexthop objects
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (20 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 21/27] nexthops: add support for replace David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 23/27] selftests: pmtu: Move running of test into a new function David Ahern
` (4 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
---
tools/testing/selftests/net/fib_nexthops.sh | 1026 +++++++++++++++++++++++++++
1 file changed, 1026 insertions(+)
create mode 100755 tools/testing/selftests/net/fib_nexthops.sh
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
new file mode 100755
index 000000000000..c5c93d5fb3ad
--- /dev/null
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -0,0 +1,1026 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# ns: me | ns: peer | ns: remote
+# 2001:db8:91::1 | 2001:db8:91::2 |
+# 172.16.1.1 | 172.16.1.2 |
+# veth1 <---|---> veth2 |
+# | veth5 <--|--> veth6 172.16.101.1
+# veth3 <---|---> veth4 | 2001:db8:101::1
+# 172.16.2.1 | 172.16.2.2 |
+# 2001:db8:92::1 | 2001:db8:92::2 |
+#
+# This test is for checking IPv4 and IPv6 FIB behavior with nexthop
+# objects. Device reference counts and network namespace cleanup tested
+# by use of network namespace for peer.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime"
+IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime"
+
+ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
+TESTS="${ALL_TESTS}"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+nsid=100
+
+################################################################################
+# utilities
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "$VERBOSE" = "1" ]; then
+ echo " rc=$rc, expected $expected"
+ fi
+
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+get_linklocal()
+{
+ local dev=$1
+ local ns
+ local addr
+
+ [ -n "$2" ] && ns="-netns $2"
+ addr=$(ip $ns -6 -br addr show dev ${dev} | \
+ awk '{
+ for (i = 3; i <= NF; ++i) {
+ if ($i ~ /^fe80/)
+ print $i
+ }
+ }'
+ )
+ addr=${addr/\/*}
+
+ [ -z "$addr" ] && return 1
+
+ echo $addr
+
+ return 0
+}
+
+create_ns()
+{
+ local n=${1}
+
+ ip netns del ${n} 2>/dev/null
+
+ set -e
+ ip netns add ${n}
+ ip netns set ${n} $((nsid++))
+ ip -netns ${n} addr add 127.0.0.1/8 dev lo
+ ip -netns ${n} link set lo up
+
+ ip netns exec ${n} sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ${n} sysctl -qw net.ipv4.fib_multipath_use_neigh=1
+ ip netns exec ${n} sysctl -qw net.ipv4.conf.default.ignore_routes_with_linkdown=1
+ ip netns exec ${n} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec ${n} sysctl -qw net.ipv6.conf.all.forwarding=1
+ ip netns exec ${n} sysctl -qw net.ipv6.conf.default.forwarding=1
+ ip netns exec ${n} sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
+ ip netns exec ${n} sysctl -qw net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${n} sysctl -qw net.ipv6.conf.default.accept_dad=0
+
+ set +e
+}
+
+setup()
+{
+ cleanup
+
+ create_ns me
+ create_ns peer
+ create_ns remote
+
+ IP="ip -netns me"
+ set -e
+ $IP li add veth1 type veth peer name veth2
+ $IP li set veth1 up
+ $IP addr add 172.16.1.1/24 dev veth1
+ $IP -6 addr add 2001:db8:91::1/64 dev veth1
+
+ $IP li add veth3 type veth peer name veth4
+ $IP li set veth3 up
+ $IP addr add 172.16.2.1/24 dev veth3
+ $IP -6 addr add 2001:db8:92::1/64 dev veth3
+
+ $IP li set veth2 netns peer up
+ ip -netns peer addr add 172.16.1.2/24 dev veth2
+ ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2
+
+ $IP li set veth4 netns peer up
+ ip -netns peer addr add 172.16.2.2/24 dev veth4
+ ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4
+
+ ip -netns remote li add veth5 type veth peer name veth6
+ ip -netns remote li set veth5 up
+ ip -netns remote addr add dev veth5 172.16.101.1/24
+ ip -netns remote addr add dev veth5 2001:db8:101::1/64
+ ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2
+ ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2
+
+ ip -netns remote li set veth6 netns peer up
+ ip -netns peer addr add dev veth6 172.16.101.2/24
+ ip -netns peer addr add dev veth6 2001:db8:101::2/64
+ set +e
+}
+
+cleanup()
+{
+ local ns
+
+ for ns in me peer remote; do
+ ip netns del ${ns} 2>/dev/null
+ done
+}
+
+check_output()
+{
+ local out="$1"
+ local expected="$2"
+ local rc=0
+
+ [ "${out}" = "${expected}" ] && return 0
+
+ if [ -z "${out}" ]; then
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\nNo entry found\n"
+ printf "Expected:\n"
+ printf " ${expected}\n"
+ fi
+ return 1
+ fi
+
+ out=$(echo ${out})
+ if [ "${out}" != "${expected}" ]; then
+ rc=1
+ if [ "${VERBOSE}" = "1" ]; then
+ printf " Unexpected entry. Have:\n"
+ printf " ${out}\n"
+ printf " Expected:\n"
+ printf " ${expected}\n\n"
+ fi
+ fi
+
+ return $rc
+}
+
+check_nexthop()
+{
+ local nharg="$1"
+ local expected="$2"
+ local out
+
+ out=$($IP nexthop ls ${nharg} 2>/dev/null)
+
+ check_output "${out}" "${expected}"
+}
+
+check_route()
+{
+ local pfx="$1"
+ local expected="$2"
+ local out
+
+ out=$($IP route ls match ${pfx} 2>/dev/null)
+
+ check_output "${out}" "${expected}"
+}
+
+check_route6()
+{
+ local pfx="$1"
+ local expected="$2"
+ local out
+
+ out=$($IP -6 route ls match ${pfx} 2>/dev/null)
+
+ check_output "${out}" "${expected}"
+}
+
+################################################################################
+# basic operations (add, delete, replace) on nexthops and nexthop groups
+#
+# IPv6
+
+ipv6_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv6"
+ echo "----------------------"
+
+ run_cmd "$IP nexthop add id 52 via 2001:db8:91::2 dev veth1"
+ rc=$?
+ log_test $rc 0 "Create nexthop with id, gw, dev"
+ if [ $rc -ne 0 ]; then
+ echo "Basic IPv6 create fails; can not continue"
+ return 1
+ fi
+
+ run_cmd "$IP nexthop get id 52"
+ log_test $? 0 "Get nexthop by id"
+ check_nexthop "id 52" "id 52 via 2001:db8:91::2 dev veth1"
+
+ run_cmd "$IP nexthop del id 52"
+ log_test $? 0 "Delete nexthop by id"
+ check_nexthop "id 52" ""
+
+ #
+ # gw, device spec
+ #
+ # gw validation, no device - fails since dev required
+ run_cmd "$IP nexthop add id 52 via 2001:db8:92::3"
+ log_test $? 2 "Create nexthop - gw only"
+
+ # gw is not reachable throught given dev
+ run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1"
+ log_test $? 2 "Create nexthop - invalid gw+dev combination"
+
+ # onlink arg overrides gw+dev lookup
+ run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1 onlink"
+ log_test $? 0 "Create nexthop - gw+dev and onlink"
+
+ # admin down should delete nexthops
+ set -e
+ run_cmd "$IP -6 nexthop add id 55 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 56 via 2001:db8:91::4 dev veth1"
+ run_cmd "$IP nexthop add id 57 via 2001:db8:91::5 dev veth1"
+ run_cmd "$IP li set dev veth1 down"
+ set +e
+ check_nexthop "dev veth1" ""
+ log_test $? 0 "Nexthops removed on admin down"
+}
+
+ipv6_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv6 groups functional"
+ echo "----------------------"
+
+ # basic functionality: create a nexthop group, default weight
+ run_cmd "$IP nexthop add id 61 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 101 group 61"
+ log_test $? 0 "Create nexthop group with single nexthop"
+
+ # get nexthop group
+ run_cmd "$IP nexthop get id 101"
+ log_test $? 0 "Get nexthop group by id"
+ check_nexthop "id 101" "id 101 group 61"
+
+ # delete nexthop group
+ run_cmd "$IP nexthop del id 101"
+ log_test $? 0 "Delete nexthop group by id"
+ check_nexthop "id 101" ""
+
+ $IP nexthop flush >/dev/null 2>&1
+ check_nexthop "id 101" ""
+
+ #
+ # create group with multiple nexthops - mix of gw and dev only
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+ run_cmd "$IP nexthop add id 65 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 62/63/64/65"
+ log_test $? 0 "Nexthop group with multiple nexthops"
+ check_nexthop "id 102" "id 102 group 62/63/64/65"
+
+ # Delete nexthop in a group and group is updated
+ run_cmd "$IP nexthop del id 63"
+ check_nexthop "id 102" "id 102 group 62/64/65"
+ log_test $? 0 "Nexthop group updated when entry is deleted"
+
+ # create group with multiple weighted nexthops
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 103 group 62/63,2/64,3/65,4"
+ log_test $? 0 "Nexthop group with weighted nexthops"
+ check_nexthop "id 103" "id 103 group 62/63,2/64,3/65,4"
+
+ # Delete nexthop in a weighted group and group is updated
+ run_cmd "$IP nexthop del id 63"
+ check_nexthop "id 103" "id 103 group 62/64,3/65,4"
+ log_test $? 0 "Weighted nexthop group updated when entry is deleted"
+
+ # admin down - nexthop is removed from group
+ run_cmd "$IP li set dev veth1 down"
+ check_nexthop "dev veth1" ""
+ log_test $? 0 "Nexthops in groups removed on admin down"
+
+ # expect groups to have been deleted as well
+ check_nexthop "" ""
+
+ run_cmd "$IP li set dev veth1 up"
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ # group with nexthops using different devices
+ set -e
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+ run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1"
+
+ run_cmd "$IP nexthop add id 72 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 73 via 2001:db8:92::3 dev veth3"
+ run_cmd "$IP nexthop add id 74 via 2001:db8:92::4 dev veth3"
+ run_cmd "$IP nexthop add id 75 via 2001:db8:92::5 dev veth3"
+ set +e
+
+ # multiple groups with same nexthop
+ run_cmd "$IP nexthop add id 104 group 62"
+ run_cmd "$IP nexthop add id 105 group 62"
+ check_nexthop "group" "id 104 group 62 id 105 group 62"
+ log_test $? 0 "Multiple groups with same nexthop"
+
+ run_cmd "$IP nexthop flush groups"
+ [ $? -ne 0 ] && return 1
+
+ # on admin down of veth1, it should be removed from the group
+ run_cmd "$IP nexthop add id 105 group 62/63/72/73/64"
+ run_cmd "$IP li set veth1 down"
+ check_nexthop "id 105" "id 105 group 72/73"
+ log_test $? 0 "Nexthops in group removed on admin down - mixed group"
+
+ run_cmd "$IP nexthop add id 106 group 105/74"
+ log_test $? 2 "Nexthop group can not have a group as an entry"
+
+ # a group can have a blackhole entry only if it is the only
+ # nexthop in the group. Needed for atomic replace with an
+ # actual nexthop group
+ run_cmd "$IP -6 nexthop add id 31 blackhole"
+ run_cmd "$IP nexthop add id 107 group 31"
+ log_test $? 0 "Nexthop group with a blackhole entry"
+
+ run_cmd "$IP nexthop add id 108 group 31/24"
+ log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
+}
+
+ipv6_fcnal_runtime()
+{
+ local rc
+
+ echo
+ echo "IPv6 functional runtime"
+ echo "-----------------------"
+
+ sleep 5
+
+ #
+ # IPv6 - the basics
+ #
+ run_cmd "$IP nexthop add id 81 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+ log_test $? 0 "Route add"
+
+ run_cmd "$IP ro delete 2001:db8:101::1/128 nhid 81"
+ log_test $? 0 "Route delete"
+
+ run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+ run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ log_test $? 0 "Ping with nexthop"
+
+ run_cmd "$IP nexthop add id 82 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 122 group 81/82"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
+ run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ log_test $? 0 "Ping - multipath"
+
+ #
+ # IPv6 with blackhole nexthops
+ #
+ run_cmd "$IP -6 nexthop add id 83 blackhole"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 83"
+ run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ log_test $? 2 "Ping - blackhole"
+
+ run_cmd "$IP nexthop replace id 83 via 2001:db8:91::2 dev veth1"
+ run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ log_test $? 0 "Ping - blackhole replaced with gateway"
+
+ run_cmd "$IP -6 nexthop replace id 83 blackhole"
+ run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ log_test $? 2 "Ping - gateway replaced by blackhole"
+
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
+ run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ if [ $? -eq 0 ]; then
+ run_cmd "$IP nexthop replace id 122 group 83"
+ run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ log_test $? 2 "Ping - group with blackhole"
+
+ run_cmd "$IP nexthop replace id 122 group 81/82"
+ run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ log_test $? 0 "Ping - group blackhole replaced with gateways"
+ else
+ log_test 2 0 "Ping - multipath failed"
+ fi
+
+ #
+ # device only and gw + dev only mix
+ #
+ run_cmd "$IP -6 nexthop add id 85 dev veth1"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 85"
+ log_test $? 0 "IPv6 route with device only nexthop"
+ check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1"
+
+ run_cmd "$IP nexthop add id 123 group 81/85"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 123"
+ log_test $? 0 "IPv6 multipath route with nexthop mix - dev only + gw"
+ check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 nexthop via 2001:db8:91::2 dev veth1 nexthop dev veth1"
+
+ #
+ # IPv6 route with v4 nexthop - not allowed
+ #
+ run_cmd "$IP ro delete 2001:db8:101::1/128"
+ run_cmd "$IP nexthop add id 84 via 172.16.1.1 dev veth1"
+ run_cmd "$IP ro add 2001:db8:101::1/128 nhid 84"
+ log_test $? 2 "IPv6 route can not have a v4 gateway"
+
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 81"
+ run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1"
+ log_test $? 2 "Nexthop replace - v6 route, v4 nexthop"
+
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
+ run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1"
+ log_test $? 2 "Nexthop replace of group entry - v6 route, v4 nexthop"
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ #
+ # weird IPv6 cases
+ #
+ run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+
+ # TO-DO:
+ # existing route with old nexthop; append route with new nexthop
+ # existing route with old nexthop; replace route with new
+ # existing route with new nexthop; replace route with old
+ # route with src address and using nexthop - not allowed
+}
+
+ipv4_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv4 functional"
+ echo "----------------------"
+
+ #
+ # basic IPv4 ops - add, get, delete
+ #
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+ rc=$?
+ log_test $rc 0 "Create nexthop with id, gw, dev"
+ if [ $rc -ne 0 ]; then
+ echo "Basic IPv4 create fails; can not continue"
+ return 1
+ fi
+
+ run_cmd "$IP nexthop get id 12"
+ log_test $? 0 "Get nexthop by id"
+ check_nexthop "id 12" "id 12 via 172.16.1.2 src 172.16.1.1 dev veth1 scope link"
+
+ run_cmd "$IP nexthop del id 12"
+ log_test $? 0 "Delete nexthop by id"
+ check_nexthop "id 52" ""
+
+ #
+ # gw, device spec
+ #
+ # gw validation, no device - fails since dev is required
+ run_cmd "$IP nexthop add id 12 via 172.16.2.3"
+ log_test $? 2 "Create nexthop - gw only"
+
+ # gw not reachable through given dev
+ run_cmd "$IP nexthop add id 13 via 172.16.3.2 dev veth1"
+ log_test $? 2 "Create nexthop - invalid gw+dev combination"
+
+ # onlink flag overrides gw+dev lookup
+ run_cmd "$IP nexthop add id 13 via 172.16.3.2 dev veth1 onlink"
+ log_test $? 0 "Create nexthop - gw+dev and onlink"
+
+ # admin down should delete nexthops
+ set -e
+ run_cmd "$IP nexthop add id 15 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop add id 16 via 172.16.1.4 dev veth1"
+ run_cmd "$IP nexthop add id 17 via 172.16.1.5 dev veth1"
+ run_cmd "$IP li set dev veth1 down"
+ set +e
+ check_nexthop "dev veth1" ""
+ log_test $? 0 "Nexthops removed on admin down"
+}
+
+ipv4_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv4 groups functional"
+ echo "----------------------"
+
+ # basic functionality: create a nexthop group, default weight
+ run_cmd "$IP nexthop add id 11 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 101 group 11"
+ log_test $? 0 "Create nexthop group with single nexthop"
+
+ # get nexthop group
+ run_cmd "$IP nexthop get id 101"
+ log_test $? 0 "Get nexthop group by id"
+ check_nexthop "id 101" "id 101 group 11"
+
+ # delete nexthop group
+ run_cmd "$IP nexthop del id 101"
+ log_test $? 0 "Delete nexthop group by id"
+ check_nexthop "id 101" ""
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ #
+ # create group with multiple nexthops
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop add id 14 via 172.16.1.4 dev veth1"
+ run_cmd "$IP nexthop add id 15 via 172.16.1.5 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 12/13/14/15"
+ log_test $? 0 "Nexthop group with multiple nexthops"
+ check_nexthop "id 102" "id 102 group 12/13/14/15"
+
+ # Delete nexthop in a group and group is updated
+ run_cmd "$IP nexthop del id 13"
+ check_nexthop "id 102" "id 102 group 12/14/15"
+ log_test $? 0 "Nexthop group updated when entry is deleted"
+
+ # create group with multiple weighted nexthops
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop add id 103 group 12/13,2/14,3/15,4"
+ log_test $? 0 "Nexthop group with weighted nexthops"
+ check_nexthop "id 103" "id 103 group 12/13,2/14,3/15,4"
+
+ # Delete nexthop in a weighted group and group is updated
+ run_cmd "$IP nexthop del id 13"
+ check_nexthop "id 103" "id 103 group 12/14,3/15,4"
+ log_test $? 0 "Weighted nexthop group updated when entry is deleted"
+
+ # admin down - nexthop is removed from group
+ run_cmd "$IP li set dev veth1 down"
+ check_nexthop "dev veth1" ""
+ log_test $? 0 "Nexthops in groups removed on admin down"
+
+ # expect groups to have been deleted as well
+ check_nexthop "" ""
+
+ run_cmd "$IP li set dev veth1 up"
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ # group with nexthops using different devices
+ set -e
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop add id 14 via 172.16.1.4 dev veth1"
+ run_cmd "$IP nexthop add id 15 via 172.16.1.5 dev veth1"
+
+ run_cmd "$IP nexthop add id 22 via 172.16.2.2 dev veth3"
+ run_cmd "$IP nexthop add id 23 via 172.16.2.3 dev veth3"
+ run_cmd "$IP nexthop add id 24 via 172.16.2.4 dev veth3"
+ run_cmd "$IP nexthop add id 25 via 172.16.2.5 dev veth3"
+ set +e
+
+ # multiple groups with same nexthop
+ run_cmd "$IP nexthop add id 104 group 12"
+ run_cmd "$IP nexthop add id 105 group 12"
+ check_nexthop "group" "id 104 group 12 id 105 group 12"
+ log_test $? 0 "Multiple groups with same nexthop"
+
+ run_cmd "$IP nexthop flush groups"
+ [ $? -ne 0 ] && return 1
+
+ # on admin down of veth1, it should be removed from the group
+ run_cmd "$IP nexthop add id 105 group 12/13/22/23/14"
+ run_cmd "$IP li set veth1 down"
+ check_nexthop "id 105" "id 105 group 22/23"
+ log_test $? 0 "Nexthops in group removed on admin down - mixed group"
+
+ run_cmd "$IP nexthop add id 106 group 105/24"
+ log_test $? 2 "Nexthop group can not have a group as an entry"
+
+ # a group can have a blackhole entry only if it is the only
+ # nexthop in the group. Needed for atomic replace with an
+ # actual nexthop group
+ run_cmd "$IP nexthop add id 31 blackhole"
+ run_cmd "$IP nexthop add id 107 group 31"
+ log_test $? 0 "Nexthop group with a blackhole entry"
+
+ run_cmd "$IP nexthop add id 108 group 31/24"
+ log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
+}
+
+ipv4_withv6_fcnal()
+{
+ local lladdr
+
+ set -e
+ lladdr=$(get_linklocal veth2 peer)
+ run_cmd "$IP nexthop add id 11 via ${lladdr} dev veth1"
+ set +e
+ run_cmd "$IP ro add 172.16.101.1/32 nhid 11"
+ log_test $? 0 "IPv6 nexthop with IPv4 route"
+ check_route "172.16.101.1" "172.16.101.1 nhid 11 via ${lladdr} dev veth1"
+
+ set -e
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 101 group 11/12"
+ set +e
+ run_cmd "$IP ro replace 172.16.101.1/32 nhid 101"
+ log_test $? 0 "IPv6 nexthop with IPv4 route"
+
+ check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
+
+ run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
+ log_test $? 0 "IPv4 route with IPv6 gateway"
+ check_route "172.16.101.1" "172.16.101.1 via ${lladdr} dev veth1"
+
+ run_cmd "$IP ro replace 172.16.101.1/32 via inet6 2001:db8:50::1 dev veth1"
+ log_test $? 2 "IPv4 route with invalid IPv6 gateway"
+}
+
+ipv4_fcnal_runtime()
+{
+ local lladdr
+ local rc
+
+ echo
+ echo "IPv4 functional runtime"
+ echo "-----------------------"
+
+ run_cmd "$IP nexthop add id 21 via 172.16.1.2 dev veth1"
+ run_cmd "$IP ro add 172.16.101.1/32 nhid 21"
+ log_test $? 0 "Route add"
+ check_route "172.16.101.1" "172.16.101.1 nhid 21 via 172.16.1.2 dev veth1"
+
+ run_cmd "$IP ro delete 172.16.101.1/32 nhid 21"
+ log_test $? 0 "Route delete"
+
+ #
+ # scope mismatch
+ #
+ run_cmd "$IP nexthop add id 22 via 172.16.1.2 dev veth1"
+ run_cmd "$IP ro add 172.16.101.1/32 nhid 22 scope host"
+ log_test $? 2 "Route add - scope conflict with nexthop"
+
+ run_cmd "$IP nexthop replace id 22 dev veth3"
+ run_cmd "$IP ro add 172.16.101.1/32 nhid 22 scope host"
+ run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
+ log_test $? 2 "Nexthop replace with invalid scope for existing route"
+
+ #
+ # add route with nexthop and check traffic
+ #
+ run_cmd "$IP nexthop replace id 21 via 172.16.1.2 dev veth1"
+ run_cmd "$IP ro replace 172.16.101.1/32 nhid 21"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 0 "Basic ping"
+
+ run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
+ run_cmd "$IP nexthop add id 122 group 21/22"
+ run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 0 "Ping - multipath"
+
+ #
+ # IPv4 with blackhole nexthops
+ #
+ run_cmd "$IP nexthop add id 23 blackhole"
+ run_cmd "$IP ro replace 172.16.101.1/32 nhid 23"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 2 "Ping - blackhole"
+
+ run_cmd "$IP nexthop replace id 23 via 172.16.1.2 dev veth1"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 0 "Ping - blackhole replaced with gateway"
+
+ run_cmd "$IP nexthop replace id 23 blackhole"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 2 "Ping - gateway replaced by blackhole"
+
+ run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ if [ $? -eq 0 ]; then
+ run_cmd "$IP nexthop replace id 122 group 23"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 2 "Ping - group with blackhole"
+
+ run_cmd "$IP nexthop replace id 122 group 21/22"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 0 "Ping - group blackhole replaced with gateways"
+ else
+ log_test 2 0 "Ping - multipath failed"
+ fi
+
+ #
+ # device only and gw + dev only mix
+ #
+ run_cmd "$IP nexthop add id 85 dev veth1"
+ run_cmd "$IP ro replace 172.16.101.1/32 nhid 85"
+ log_test $? 0 "IPv4 route with device only nexthop"
+ check_route "172.16.101.1" "172.16.101.1 nhid 85 dev veth1"
+
+ run_cmd "$IP nexthop add id 122 group 21/85"
+ run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
+ log_test $? 0 "IPv4 multipath route with nexthop mix - dev only + gw"
+ check_route "172.16.101.1" "172.16.101.1 nhid 85 nexthop via 172.16.1.2 dev veth1 nexthop dev veth1"
+
+ #
+ # IPv4 with IPv6
+ #
+ set -e
+ lladdr=$(get_linklocal veth2 peer)
+ run_cmd "$IP nexthop add id 24 via ${lladdr} dev veth1"
+ set +e
+ run_cmd "$IP ro replace 172.16.101.1/32 nhid 24"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 0 "IPv6 nexthop with IPv4 route"
+
+ $IP neigh sh | grep -q "${lladdr} dev veth1"
+ if [ $? -eq 1 ]; then
+ echo " WARNING: Neigh entry missing for ${lladdr}"
+ $IP neigh sh | grep 'dev veth1'
+ fi
+
+ $IP neigh sh | grep -q "172.16.101.1 dev eth1"
+ if [ $? -eq 0 ]; then
+ echo " WARNING: Neigh entry exists for 172.16.101.1"
+ $IP neigh sh | grep 'dev veth1'
+ fi
+
+ set -e
+ run_cmd "$IP nexthop add id 25 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 101 group 24/25"
+ set +e
+ run_cmd "$IP ro replace 172.16.101.1/32 nhid 101"
+ log_test $? 0 "IPv4 route with mixed v4-v6 multipath route"
+
+ check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
+
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 0 "IPv6 nexthop with IPv4 route"
+
+ run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 0 "IPv4 route with IPv6 gateway"
+
+ $IP neigh sh | grep -q "${lladdr} dev veth1"
+ if [ $? -eq 1 ]; then
+ echo " WARNING: Neigh entry missing for ${lladdr}"
+ $IP neigh sh | grep 'dev veth1'
+ fi
+
+ $IP neigh sh | grep -q "172.16.101.1 dev eth1"
+ if [ $? -eq 0 ]; then
+ echo " WARNING: Neigh entry exists for 172.16.101.1"
+ $IP neigh sh | grep 'dev veth1'
+ fi
+
+ #
+ # MPLS as an example of LWT encap
+ #
+ run_cmd "$IP nexthop add id 51 encap mpls 101 via 172.16.1.2 dev veth1"
+ log_test $? 0 "IPv4 route with MPLS encap"
+ check_nexthop "id 51" "id 51 encap mpls 101 via 172.16.1.2 dev veth1 scope link"
+ log_test $? 0 "IPv4 route with MPLS encap - check"
+
+ run_cmd "$IP nexthop add id 52 encap mpls 102 via inet6 2001:db8:91::2 dev veth1"
+ log_test $? 0 "IPv4 route with MPLS encap and v6 gateway"
+ check_nexthop "id 52" "id 52 encap mpls 102 via 2001:db8:91::2 dev veth1 scope link"
+ log_test $? 0 "IPv4 route with MPLS encap, v6 gw - check"
+}
+
+basic()
+{
+ echo
+ echo "Basic functional tests"
+ echo "----------------------"
+ run_cmd "$IP nexthop ls"
+ log_test $? 0 "List with nothing defined"
+
+ run_cmd "$IP nexthop get id 1"
+ log_test $? 2 "Nexthop get on non-existent id"
+
+ # attempt to create nh without a device or gw - fails
+ run_cmd "$IP nexthop add id 1"
+ log_test $? 2 "Nexthop with no device or gateway"
+
+ # attempt to create nh with down device - fails
+ $IP li set veth1 down
+ run_cmd "$IP nexthop add id 1 dev veth1"
+ log_test $? 2 "Nexthop with down device"
+
+ # create nh with linkdown device - fails
+ $IP li set veth1 up
+ ip -netns peer li set veth2 down
+ run_cmd "$IP nexthop add id 1 dev veth1"
+ log_test $? 2 "Nexthop with device that is linkdown"
+ ip -netns peer li set veth2 up
+
+ # device only
+ run_cmd "$IP nexthop add id 1 dev veth1"
+ log_test $? 0 "Nexthop with device only"
+
+ # create nh with duplicate id
+ run_cmd "$IP nexthop add id 1 dev veth3"
+ log_test $? 2 "Nexthop with duplicate id"
+
+ # blackhole nexthop
+ run_cmd "$IP nexthop add id 2 blackhole"
+ log_test $? 0 "Blackhole nexthop"
+
+ # blackhole nexthop can not have other specs
+ run_cmd "$IP nexthop replace id 2 blackhole dev veth1"
+ log_test $? 2 "Blackhole nexthop with other attributes"
+
+ #
+ # groups
+ #
+
+ run_cmd "$IP nexthop add id 101 group 1"
+ log_test $? 0 "Create group"
+
+ run_cmd "$IP nexthop add id 102 group 2"
+ log_test $? 0 "Create group with blackhole nexthop"
+
+ # multipath group can not have a blackhole as 1 path
+ run_cmd "$IP nexthop add id 103 group 1/2"
+ log_test $? 2 "Create multipath group where 1 path is a blackhole"
+
+ # multipath group can not have a member replaced by a blackhole
+ run_cmd "$IP nexthop replace id 2 dev veth3"
+ run_cmd "$IP nexthop replace id 102 group 1/2"
+ run_cmd "$IP nexthop replace id 2 blackhole"
+ log_test $? 2 "Multipath group can not have a member replaced by blackhole"
+
+ # attempt to create group with non-existent nexthop
+ run_cmd "$IP nexthop add id 103 group 12"
+ log_test $? 2 "Create group with non-existent nexthop"
+
+ # attempt to create group with same nexthop
+ run_cmd "$IP nexthop add id 103 group 1/1"
+ log_test $? 2 "Create group with same nexthop multiple times"
+
+ # replace nexthop with a group - fails
+ run_cmd "$IP nexthop replace id 2 group 1"
+ log_test $? 2 "Replace nexthop with nexthop group"
+
+ # replace nexthop group with a nexthop - fails
+ run_cmd "$IP nexthop replace id 101 dev veth1"
+ log_test $? 2 "Replace nexthop group with nexthop"
+
+ # nexthop group with other attributes fail
+ run_cmd "$IP nexthop add id 104 group 1 dev veth1"
+ log_test $? 2 "Nexthop group and device"
+
+ run_cmd "$IP nexthop add id 104 group 1 blackhole"
+ log_test $? 2 "Nexthop group and blackhole"
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $ALL_TESTS)
+ -4 IPv4 tests only
+ -6 IPv6 tests only
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v verbose mode (show commands and output)
+
+ Runtime test
+ -n num Number of nexthops to target
+ -N Use new style to install routes in DUT
+
+done
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pP46hv o
+do
+ case $o in
+ t) TESTS=$OPTARG;;
+ 4) TESTS=${IPV4_TESTS};;
+ 6) TESTS=${IPV6_TESTS};;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip help 2>&1 | grep -q nexthop
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing nexthop command"
+ exit $ksft_skip
+fi
+
+out=$(ip nexthop ls 2>&1 | grep -q "Operation not supported")
+if [ $? -eq 0 ]; then
+ echo "SKIP: kernel lacks nexthop support"
+ exit $ksft_skip
+fi
+
+for t in $TESTS
+do
+ case $t in
+ none) IP="ip -netns peer"; setup; exit 0;;
+ *) setup; $t; cleanup;;
+ esac
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 23/27] selftests: pmtu: Move running of test into a new function
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (21 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 22/27] selftests: Add test cases for nexthop objects David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 24/27] selftests: pmtu: Move route installs to " David Ahern
` (3 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Move the block of code that runs a test and prints the verdict to a
new function, run_test.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
tools/testing/selftests/net/pmtu.sh | 63 +++++++++++++++++++++----------------
1 file changed, 36 insertions(+), 27 deletions(-)
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 4a1275990d7e..3d6b21c4b1db 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -1090,6 +1090,41 @@ test_cleanup_ipv4_exception() {
test_cleanup_vxlanX_exception 4
}
+run_test() {
+ (
+ tname="$1"
+ tdesc="$2"
+
+ unset IFS
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\n##########################################################################\n\n"
+ fi
+
+ eval test_${tname}
+ ret=$?
+
+ if [ $ret -eq 0 ]; then
+ printf "TEST: %-60s [ OK ]\n" "${tdesc}"
+ elif [ $ret -eq 1 ]; then
+ printf "TEST: %-60s [FAIL]\n" "${tdesc}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "Pausing. Hit enter to continue"
+ read a
+ fi
+ err_flush
+ exit 1
+ elif [ $ret -eq 2 ]; then
+ printf "TEST: %-60s [SKIP]\n" "${tdesc}"
+ err_flush
+ fi
+
+ return $ret
+ )
+ [ $? -ne 0 ] && exitcode=1
+}
+
usage() {
echo
echo "$0 [OPTIONS] [TEST]..."
@@ -1147,33 +1182,7 @@ for t in ${tests}; do
done
[ $run_this -eq 0 ] && continue
- (
- unset IFS
-
- if [ "$VERBOSE" = "1" ]; then
- printf "\n##########################################################################\n\n"
- fi
-
- eval test_${name}
- ret=$?
-
- if [ $ret -eq 0 ]; then
- printf "TEST: %-60s [ OK ]\n" "${t}"
- elif [ $ret -eq 1 ]; then
- printf "TEST: %-60s [FAIL]\n" "${t}"
- if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
- echo
- echo "Pausing. Hit enter to continue"
- read a
- fi
- err_flush
- exit 1
- elif [ $ret -eq 2 ]; then
- printf "TEST: %-60s [SKIP]\n" "${t}"
- err_flush
- fi
- )
- [ $? -ne 0 ] && exitcode=1
+ run_test "${name}" "${t}"
done
exit ${exitcode}
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 24/27] selftests: pmtu: Move route installs to a new function
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (22 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 23/27] selftests: pmtu: Move running of test into a new function David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 25/27] selftests: pmtu: Add support for routing via nexthop objects David Ahern
` (2 subsequent siblings)
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Move the route add commands to a new function called setup_routing_old.
The '_old' refers to the classic way of installing routes.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
tools/testing/selftests/net/pmtu.sh | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 3d6b21c4b1db..14ffcf490032 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -448,6 +448,20 @@ setup_xfrm6() {
setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
}
+setup_routing_old() {
+ for i in ${routes}; do
+ [ "${ns}" = "" ] && ns="${i}" && continue
+ [ "${addr}" = "" ] && addr="${i}" && continue
+ [ "${gw}" = "" ] && gw="${i}"
+
+ ns_name="$(nsname ${ns})"
+
+ ip -n ${ns_name} route add ${addr} via ${gw}
+
+ ns=""; addr=""; gw=""
+ done
+}
+
setup_routing() {
for i in ${NS_R1} ${NS_R2}; do
ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
@@ -478,17 +492,7 @@ setup_routing() {
ns=""; peer=""; segment=""
done
- for i in ${routes}; do
- [ "${ns}" = "" ] && ns="${i}" && continue
- [ "${addr}" = "" ] && addr="${i}" && continue
- [ "${gw}" = "" ] && gw="${i}"
-
- ns_name="$(nsname ${ns})"
-
- ip -n ${ns_name} route add ${addr} via ${gw}
-
- ns=""; addr=""; gw=""
- done
+ setup_routing_old
}
setup() {
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 25/27] selftests: pmtu: Add support for routing via nexthop objects
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (23 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 24/27] selftests: pmtu: Move route installs to " David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 26/27] selftests: icmp_redirect: " David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 27/27] selftests: Add version of router_multipath.sh using " David Ahern
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add routing setup using nexthop objects and repeat tests with
old and new routing.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
tools/testing/selftests/net/pmtu.sh | 158 ++++++++++++++++++++++++++++--------
1 file changed, 126 insertions(+), 32 deletions(-)
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 14ffcf490032..9e6d8b704186 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -123,34 +123,35 @@ TRACING=0
# Some systems don't have a ping6 binary anymore
which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+# Name Description re-run with nh
tests="
- pmtu_ipv4_exception ipv4: PMTU exceptions
- pmtu_ipv6_exception ipv6: PMTU exceptions
- pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions
- pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions
- pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions
- pmtu_ipv6_vxlan6_exception IPv6 over vxlan6: PMTU exceptions
- pmtu_ipv4_geneve4_exception IPv4 over geneve4: PMTU exceptions
- pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions
- pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions
- pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions
- pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions
- pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions
- pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions
- pmtu_ipv6_fou6_exception IPv6 over fou6: PMTU exceptions
- pmtu_ipv4_gue4_exception IPv4 over gue4: PMTU exceptions
- pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions
- pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions
- pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions
- pmtu_vti6_exception vti6: PMTU exceptions
- pmtu_vti4_exception vti4: PMTU exceptions
- pmtu_vti4_default_mtu vti4: default MTU assignment
- pmtu_vti6_default_mtu vti6: default MTU assignment
- pmtu_vti4_link_add_mtu vti4: MTU setting on link creation
- pmtu_vti6_link_add_mtu vti6: MTU setting on link creation
- pmtu_vti6_link_change_mtu vti6: MTU changes on link changes
- cleanup_ipv4_exception ipv4: cleanup of cached exceptions
- cleanup_ipv6_exception ipv6: cleanup of cached exceptions"
+ pmtu_ipv4_exception ipv4: PMTU exceptions 1
+ pmtu_ipv6_exception ipv6: PMTU exceptions 1
+ pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1
+ pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1
+ pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1
+ pmtu_ipv6_vxlan6_exception IPv6 over vxlan6: PMTU exceptions 1
+ pmtu_ipv4_geneve4_exception IPv4 over geneve4: PMTU exceptions 1
+ pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions 1
+ pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions 1
+ pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions 1
+ pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions 1
+ pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions 1
+ pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions 1
+ pmtu_ipv6_fou6_exception IPv6 over fou6: PMTU exceptions 1
+ pmtu_ipv4_gue4_exception IPv4 over gue4: PMTU exceptions 1
+ pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions 1
+ pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions 1
+ pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions 1
+ pmtu_vti6_exception vti6: PMTU exceptions 0
+ pmtu_vti4_exception vti4: PMTU exceptions 0
+ pmtu_vti4_default_mtu vti4: default MTU assignment 0
+ pmtu_vti6_default_mtu vti6: default MTU assignment 0
+ pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0
+ pmtu_vti6_link_add_mtu vti6: MTU setting on link creation 0
+ pmtu_vti6_link_change_mtu vti6: MTU changes on link changes 0
+ cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1
+ cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1"
NS_A="ns-A"
NS_B="ns-B"
@@ -194,6 +195,30 @@ routes="
B default ${prefix6}:${b_r1}::2
"
+USE_NH="no"
+# ns family nh id destination gateway
+nexthops="
+ A 4 41 ${prefix4}.${a_r1}.2 veth_A-R1
+ A 4 42 ${prefix4}.${a_r2}.2 veth_A-R2
+ B 4 41 ${prefix4}.${b_r1}.2 veth_B-R1
+
+ A 6 61 ${prefix6}:${a_r1}::2 veth_A-R1
+ A 6 62 ${prefix6}:${a_r2}::2 veth_A-R2
+ B 6 61 ${prefix6}:${b_r1}::2 veth_B-R1
+"
+
+# nexthop id correlates to id in nexthops config above
+# ns family prefix nh id
+routes_nh="
+ A 4 default 41
+ A 4 ${prefix4}.${b_r2}.1 42
+ B 4 default 41
+
+ A 6 default 61
+ A 6 ${prefix6}:${b_r2}::1 62
+ B 6 default 61
+"
+
veth4_a_addr="192.168.1.1"
veth4_b_addr="192.168.1.2"
veth4_mask="24"
@@ -462,6 +487,36 @@ setup_routing_old() {
done
}
+setup_routing_new() {
+ for i in ${nexthops}; do
+ [ "${ns}" = "" ] && ns="${i}" && continue
+ [ "${fam}" = "" ] && fam="${i}" && continue
+ [ "${nhid}" = "" ] && nhid="${i}" && continue
+ [ "${gw}" = "" ] && gw="${i}" && continue
+ [ "${dev}" = "" ] && dev="${i}"
+
+ ns_name="$(nsname ${ns})"
+
+ ip -n ${ns_name} -${fam} nexthop add id ${nhid} via ${gw} dev ${dev}
+
+ ns=""; fam=""; nhid=""; gw=""; dev=""
+
+ done
+
+ for i in ${routes_nh}; do
+ [ "${ns}" = "" ] && ns="${i}" && continue
+ [ "${fam}" = "" ] && fam="${i}" && continue
+ [ "${addr}" = "" ] && addr="${i}" && continue
+ [ "${nhid}" = "" ] && nhid="${i}"
+
+ ns_name="$(nsname ${ns})"
+
+ ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid}
+
+ ns=""; fam=""; addr=""; nhid=""
+ done
+}
+
setup_routing() {
for i in ${NS_R1} ${NS_R2}; do
ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
@@ -492,7 +547,13 @@ setup_routing() {
ns=""; peer=""; segment=""
done
- setup_routing_old
+ if [ "$USE_NH" = "yes" ]; then
+ setup_routing_new
+ else
+ setup_routing_old
+ fi
+
+ return 0
}
setup() {
@@ -1126,7 +1187,19 @@ run_test() {
return $ret
)
- [ $? -ne 0 ] && exitcode=1
+ ret=$?
+ [ $ret -ne 0 ] && exitcode=1
+
+ return $ret
+}
+
+run_test_nh() {
+ tname="$1"
+ tdesc="$2"
+
+ USE_NH=yes
+ run_test "${tname}" "${tdesc} - nexthop objects"
+ USE_NH=no
}
usage() {
@@ -1175,8 +1248,20 @@ trap cleanup EXIT
# start clean
cleanup
+HAVE_NH=no
+ip nexthop ls >/dev/null 2>&1
+[ $? -eq 0 ] && HAVE_NH=yes
+
+name=""
+desc=""
+rerun_nh=0
for t in ${tests}; do
- [ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
+ [ "${name}" = "" ] && name="${t}" && continue
+ [ "${desc}" = "" ] && desc="${t}" && continue
+
+ if [ "${HAVE_NH}" = "yes" ]; then
+ rerun_nh="${t}"
+ fi
run_this=1
for arg do
@@ -1184,9 +1269,18 @@ for t in ${tests}; do
[ "${arg}" = "${name}" ] && run_this=1 && break
run_this=0
done
- [ $run_this -eq 0 ] && continue
+ if [ $run_this -eq 1 ]; then
+ run_test "${name}" "${desc}"
+ # if test was skipped no need to retry with nexthop objects
+ [ $? -eq 2 ] && rerun_nh=0
- run_test "${name}" "${t}"
+ if [ "${rerun_nh}" = "1" ]; then
+ run_test_nh "${name}" "${desc}"
+ fi
+ fi
+ name=""
+ desc=""
+ rerun_nh=0
done
exit ${exitcode}
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 26/27] selftests: icmp_redirect: Add support for routing via nexthop objects
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (24 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 25/27] selftests: pmtu: Add support for routing via nexthop objects David Ahern
@ 2019-06-01 3:36 ` David Ahern
2019-06-01 3:36 ` [PATCH RFC net-next 27/27] selftests: Add version of router_multipath.sh using " David Ahern
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add a second pass to icmp_redirect.sh to use nexthop objects for
routes.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
tools/testing/selftests/net/icmp_redirect.sh | 60 ++++++++++++++++++++++++++++
1 file changed, 60 insertions(+)
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index 76a7c4472dc3..ecb661e09acf 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -331,6 +331,38 @@ run_ping()
run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
}
+replace_route_new()
+{
+ # r1 to h2 via r2 and eth0
+ run_cmd ip -netns r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0
+ run_cmd ip -netns r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0
+}
+
+reset_route_new()
+{
+ run_cmd ip -netns r1 nexthop flush
+ run_cmd ip -netns h1 nexthop flush
+
+ initial_route_new
+}
+
+initial_route_new()
+{
+ # r1 to h2 via r2 and eth1
+ run_cmd ip -netns r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1
+ run_cmd ip -netns r1 ro add ${H2_N2} nhid 1
+
+ run_cmd ip -netns r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1
+ run_cmd ip -netns r1 -6 ro add ${H2_N2_6} nhid 2
+
+ # h1 to h2 via r1
+ run_cmd ip -netns h1 nexthop add id 1 via ${R1_N1_IP} dev br0
+ run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1
+
+ run_cmd ip -netns h1 nexthop add id 2 via ${R1_LLADDR} dev br0
+ run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2
+}
+
replace_route_legacy()
{
# r1 to h2 via r2 and eth0
@@ -349,6 +381,17 @@ reset_route_legacy()
initial_route_legacy
}
+reset_route_legacy()
+{
+ run_cmd ip -netns r1 ro del ${H2_N2}
+ run_cmd ip -netns r1 -6 ro del ${H2_N2_6}
+
+ run_cmd ip -netns h1 ro del ${H1_VRF_ARG} ${H2_N2}
+ run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0
+
+ initial_route_legacy
+}
+
initial_route_legacy()
{
# r1 to h2 via r2 and eth1
@@ -479,6 +522,23 @@ WITH_VRF=yes
setup
do_test "legacy"
+cleanup
+log_section "Routing with nexthop objects"
+ip nexthop ls >/dev/null 2>&1
+if [ $? -eq 0 ]; then
+ WITH_VRF=no
+ setup
+ do_test "new"
+
+ cleanup
+ log_section "Routing with nexthop objects and VRF"
+ WITH_VRF=yes
+ setup
+ do_test "new"
+else
+ echo "Nexthop objects not supported; skipping tests"
+fi
+
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH RFC net-next 27/27] selftests: Add version of router_multipath.sh using nexthop objects
2019-06-01 3:35 [PATCH RFC net-next 00/27] nexthops: Final patches David Ahern
` (25 preceding siblings ...)
2019-06-01 3:36 ` [PATCH RFC net-next 26/27] selftests: icmp_redirect: " David Ahern
@ 2019-06-01 3:36 ` David Ahern
26 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2019-06-01 3:36 UTC (permalink / raw)
To: davem, netdev; +Cc: alexei.starovoitov, David Ahern
From: David Ahern <dsahern@gmail.com>
Add a version of router_multipath.sh that uses nexthop objects for
routes. Ido requested a version that does not cause regressions with
their testing since mlxsw does not support nexthop objects yet.
Signed-off-by: David Ahern <dsahern@gmail.com>
---
.../selftests/net/forwarding/router_mpath_nh.sh | 370 +++++++++++++++++++++
1 file changed, 370 insertions(+)
create mode 100755 tools/testing/selftests/net/forwarding/router_mpath_nh.sh
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
new file mode 100755
index 000000000000..4bd356e574d9
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -0,0 +1,370 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+NUM_NETIFS=8
+
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+ vrf_create "vrf-r1"
+ ip link set dev $rp11 master vrf-r1
+ ip link set dev $rp12 master vrf-r1
+ ip link set dev $rp13 master vrf-r1
+
+ ip link set dev vrf-r1 up
+ ip link set dev $rp11 up
+ ip link set dev $rp12 up
+ ip link set dev $rp13 up
+
+ ip address add 192.0.2.1/24 dev $rp11
+ ip address add 2001:db8:1::1/64 dev $rp11
+
+ ip address add 169.254.2.12/24 dev $rp12
+ ip address add fe80:2::12/64 dev $rp12
+
+ ip address add 169.254.3.13/24 dev $rp13
+ ip address add fe80:3::13/64 dev $rp13
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-r1
+ ip route del 198.51.100.0/24 vrf vrf-r1
+
+ ip address del fe80:3::13/64 dev $rp13
+ ip address del 169.254.3.13/24 dev $rp13
+
+ ip address del fe80:2::12/64 dev $rp12
+ ip address del 169.254.2.12/24 dev $rp12
+
+ ip address del 2001:db8:1::1/64 dev $rp11
+ ip address del 192.0.2.1/24 dev $rp11
+
+ ip link set dev $rp13 down
+ ip link set dev $rp12 down
+ ip link set dev $rp11 down
+
+ vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+ vrf_create "vrf-r2"
+ ip link set dev $rp21 master vrf-r2
+ ip link set dev $rp22 master vrf-r2
+ ip link set dev $rp23 master vrf-r2
+
+ ip link set dev vrf-r2 up
+ ip link set dev $rp21 up
+ ip link set dev $rp22 up
+ ip link set dev $rp23 up
+
+ ip address add 198.51.100.1/24 dev $rp21
+ ip address add 2001:db8:2::1/64 dev $rp21
+
+ ip address add 169.254.2.22/24 dev $rp22
+ ip address add fe80:2::22/64 dev $rp22
+
+ ip address add 169.254.3.23/24 dev $rp23
+ ip address add fe80:3::23/64 dev $rp23
+}
+
+router2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-r2
+ ip route del 192.0.2.0/24 vrf vrf-r2
+
+ ip address del fe80:3::23/64 dev $rp23
+ ip address del 169.254.3.23/24 dev $rp23
+
+ ip address del fe80:2::22/64 dev $rp22
+ ip address del 169.254.2.22/24 dev $rp22
+
+ ip address del 2001:db8:2::1/64 dev $rp21
+ ip address del 198.51.100.1/24 dev $rp21
+
+ ip link set dev $rp23 down
+ ip link set dev $rp22 down
+ ip link set dev $rp21 down
+
+ vrf_destroy "vrf-r2"
+}
+
+routing_nh_obj()
+{
+ # h1
+ ip nexthop add id 14 via 192.0.2.1 dev $h1
+ ip route add 198.51.100.0/24 vrf vrf-h1 nhid 14
+
+ ip nexthop add id 16 via 2001:db8:1::1 dev $h1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nhid 16
+
+ # h2
+ ip nexthop add id 24 via 198.51.100.1 dev $h2
+ ip route add 192.0.2.0/24 vrf vrf-h2 nhid 24
+
+ ip nexthop add id 26 via 2001:db8:2::1 dev $h2
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nhid 26
+
+ # router 1
+ ip nexthop add id 101 via 169.254.2.22 dev $rp12
+ ip nexthop add id 102 via 169.254.3.23 dev $rp13
+ ip nexthop add id 103 group 101/102
+ ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103
+
+ ip nexthop add id 104 via fe80:2::22 dev $rp12
+ ip nexthop add id 105 via fe80:3::23 dev $rp13
+ ip nexthop add id 106 group 104/105
+ ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 104
+
+ # router 2
+ ip nexthop add id 201 via 169.254.2.12 dev $rp22
+ ip nexthop add id 202 via 169.254.3.13 dev $rp23
+ ip nexthop add id 203 group 201/202
+ ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203
+
+ ip nexthop add id 204 via fe80:2::12 dev $rp22
+ ip nexthop add id 205 via fe80:3::13 dev $rp23
+ ip nexthop add id 206 group 204/205
+ ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206
+}
+
+multipath4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+ nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ # Restore settings.
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 \
+ nexthop via 169.254.3.23 dev $rp13
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+ nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+ nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+ done
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+}
+
+multipath_test()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ log_info "Running IPv6 multipath tests"
+ multipath6_test "ECMP" 1 1
+ multipath6_test "Weighted MP 2:1" 2 1
+ multipath6_test "Weighted MP 11:45" 11 45
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ multipath6_l4_test "ECMP" 1 1
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp11=${NETIFS[p2]}
+
+ rp12=${NETIFS[p3]}
+ rp22=${NETIFS[p4]}
+
+ rp13=${NETIFS[p5]}
+ rp23=${NETIFS[p6]}
+
+ rp21=${NETIFS[p7]}
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router1_create
+ router2_create
+
+ routing_nh_obj
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+ip nexthop ls >/dev/null 2>&1
+if [ $? -eq 0 ]; then
+ trap cleanup EXIT
+
+ cleanup
+ setup_prepare
+ setup_wait
+
+ tests_run
+else
+ echo "Nexthop objects not supported; skipping tests"
+fi
+
+exit $EXIT_STATUS
--
2.11.0
^ permalink raw reply related [flat|nested] 28+ messages in thread