* [PATCH net-next 07/10] selftests: forwarding: Add blackhole nexthops tests
From: Ido Schimmel @ 2020-11-23 7:12 UTC (permalink / raw)
To: netdev; +Cc: davem, kuba, jiri, dsahern, mlxsw, Ido Schimmel
In-Reply-To: <20201123071230.676469-1-idosch@idosch.org>
From: Ido Schimmel <idosch@nvidia.com>
Test that IPv4 and IPv6 ping fail when the route is using a blackhole
nexthop or a group with a blackhole nexthop. Test that ping passes when
the route starts using a valid nexthop.
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
.../net/forwarding/router_mpath_nh.sh | 58 ++++++++++++++++++-
1 file changed, 57 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index e8c2573d5232..388e4492b81b 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -1,7 +1,13 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ multipath_test
+ ping_ipv4_blackhole
+ ping_ipv6_blackhole
+"
NUM_NETIFS=8
source lib.sh
@@ -302,6 +308,56 @@ multipath_test()
multipath6_l4_test "Weighted MP 11:45" 11 45
}
+ping_ipv4_blackhole()
+{
+ RET=0
+
+ ip nexthop add id 1001 blackhole
+ ip nexthop add id 1002 group 1001
+
+ ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1001
+ ping_do $h1 198.51.100.2
+ check_fail $? "ping did not fail when using a blackhole nexthop"
+
+ ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1002
+ ping_do $h1 198.51.100.2
+ check_fail $? "ping did not fail when using a blackhole nexthop group"
+
+ ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 103
+ ping_do $h1 198.51.100.2
+ check_err $? "ping failed with a valid nexthop"
+
+ log_test "IPv4 blackhole ping"
+
+ ip nexthop del id 1002
+ ip nexthop del id 1001
+}
+
+ping_ipv6_blackhole()
+{
+ RET=0
+
+ ip -6 nexthop add id 1001 blackhole
+ ip nexthop add id 1002 group 1001
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1001
+ ping6_do $h1 2001:db8:2::2
+ check_fail $? "ping did not fail when using a blackhole nexthop"
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1002
+ ping6_do $h1 2001:db8:2::2
+ check_fail $? "ping did not fail when using a blackhole nexthop group"
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 106
+ ping6_do $h1 2001:db8:2::2
+ check_err $? "ping failed with a valid nexthop"
+
+ log_test "IPv6 blackhole ping"
+
+ ip nexthop del id 1002
+ ip -6 nexthop del id 1001
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
--
2.28.0
^ permalink raw reply related
* [PATCH net-next 05/10] mlxsw: spectrum_router: Add support for blackhole nexthops
From: Ido Schimmel @ 2020-11-23 7:12 UTC (permalink / raw)
To: netdev; +Cc: davem, kuba, jiri, dsahern, mlxsw, Ido Schimmel
In-Reply-To: <20201123071230.676469-1-idosch@idosch.org>
From: Ido Schimmel <idosch@nvidia.com>
Add support for blackhole nexthops by programming them to the adjacency
table with a discard action.
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
.../ethernet/mellanox/mlxsw/spectrum_dpipe.c | 9 ++--
.../ethernet/mellanox/mlxsw/spectrum_router.c | 48 ++++++++++++++++---
.../ethernet/mellanox/mlxsw/spectrum_router.h | 1 +
3 files changed, 48 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index daf029931b5f..ed81d4fa48ac 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -913,7 +913,8 @@ static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp)
mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router)
if (mlxsw_sp_nexthop_offload(nh) &&
- !mlxsw_sp_nexthop_group_has_ipip(nh))
+ !mlxsw_sp_nexthop_group_has_ipip(nh) &&
+ !mlxsw_sp_nexthop_is_discard(nh))
size++;
return size;
}
@@ -1105,7 +1106,8 @@ mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp,
nh_count = 0;
mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
if (!mlxsw_sp_nexthop_offload(nh) ||
- mlxsw_sp_nexthop_group_has_ipip(nh))
+ mlxsw_sp_nexthop_group_has_ipip(nh) ||
+ mlxsw_sp_nexthop_is_discard(nh))
continue;
if (nh_count < nh_skip)
@@ -1186,7 +1188,8 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
if (!mlxsw_sp_nexthop_offload(nh) ||
- mlxsw_sp_nexthop_group_has_ipip(nh))
+ mlxsw_sp_nexthop_group_has_ipip(nh) ||
+ mlxsw_sp_nexthop_is_discard(nh))
continue;
mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index ef0e4e452f47..d551e9bc373c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2858,9 +2858,10 @@ struct mlxsw_sp_nexthop {
offloaded:1, /* set in case the neigh is actually put into
* KVD linear area of this group.
*/
- update:1; /* set indicates that MAC of this neigh should be
+ update:1, /* set indicates that MAC of this neigh should be
* updated in HW
*/
+ discard:1; /* nexthop is programmed to discard packets */
enum mlxsw_sp_nexthop_type type;
union {
struct mlxsw_sp_neigh_entry *neigh_entry;
@@ -3011,6 +3012,11 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
return false;
}
+bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh)
+{
+ return nh->discard;
+}
+
struct mlxsw_sp_nexthop_group_cmp_arg {
enum mlxsw_sp_nexthop_group_type type;
union {
@@ -3285,7 +3291,11 @@ static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
true, MLXSW_REG_RATR_TYPE_ETHERNET,
adj_index, nh->rif->rif_index);
- mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
+ if (nh->discard)
+ mlxsw_reg_ratr_trap_action_set(ratr_pl,
+ MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
+ else
+ mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
if (nh->counter_valid)
mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
else
@@ -4128,9 +4138,7 @@ mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
{
int err = -EINVAL;
- if (nh->is_reject)
- NL_SET_ERR_MSG_MOD(extack, "Blackhole nexthops are not supported");
- else if (nh->is_fdb)
+ if (nh->is_fdb)
NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
else if (nh->has_encap)
NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
@@ -4165,7 +4173,7 @@ mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
/* Device only nexthops with an IPIP device are programmed as
* encapsulating adjacency entries.
*/
- if (!nh->gw_family &&
+ if (!nh->gw_family && !nh->is_reject &&
!mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
return -EINVAL;
@@ -4199,10 +4207,31 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
return true;
dev = info->nh->dev;
- return info->nh->gw_family ||
+ return info->nh->gw_family || info->nh->is_reject ||
mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
}
+static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ u16 lb_rif_index = mlxsw_sp->router->lb_rif_index;
+
+ nh->discard = 1;
+ nh->should_offload = 1;
+ /* While nexthops that discard packets do not forward packets
+ * via an egress RIF, they still need to be programmed using a
+ * valid RIF, so use the loopback RIF created during init.
+ */
+ nh->rif = mlxsw_sp->router->rifs[lb_rif_index];
+}
+
+static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ nh->rif = NULL;
+ nh->should_offload = 0;
+}
+
static int
mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp,
@@ -4236,6 +4265,9 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
if (err)
goto err_type_init;
+ if (nh_obj->is_reject)
+ mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
+
return 0;
err_type_init:
@@ -4247,6 +4279,8 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh)
{
+ if (nh->discard)
+ mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
list_del(&nh->router_list_node);
mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index f9a59d454e28..96d8bf7a9a67 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -201,6 +201,7 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
u32 *p_adj_size, u32 *p_adj_hash_index);
struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh);
bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
+bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh);
#define mlxsw_sp_nexthop_for_each(nh, router) \
for (nh = mlxsw_sp_nexthop_next(router, NULL); nh; \
nh = mlxsw_sp_nexthop_next(router, nh))
--
2.28.0
^ permalink raw reply related
* [PATCH net-next 02/10] mlxsw: spectrum_router: Use different trap identifier for unresolved nexthops
From: Ido Schimmel @ 2020-11-23 7:12 UTC (permalink / raw)
To: netdev; +Cc: davem, kuba, jiri, dsahern, mlxsw, Ido Schimmel
In-Reply-To: <20201123071230.676469-1-idosch@idosch.org>
From: Ido Schimmel <idosch@nvidia.com>
Unresolved nexthops are currently written to the adjacency table with a
discard action. Packets hitting such entries are trapped to the CPU via
the 'DISCARD_ROUTER3' trap which can be enabled or disabled on demand,
but is always enabled in order to ensure the kernel can resolve the
unresolved neighbours.
This trap will be needed for blackhole nexthops support. Therefore, move
unresolved nexthops to explicitly program the adjacency entries with a
trap action and a different trap identifier, 'RTR_EGRESS0'.
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++-
drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c | 2 +-
drivers/net/ethernet/mellanox/mlxsw/trap.h | 1 +
3 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index c61751e67750..84ed068d17f8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5008,11 +5008,12 @@ static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index)
if (err)
return err;
- trap_action = MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS;
+ trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
MLXSW_REG_RATR_TYPE_ETHERNET,
mlxsw_sp->router->adj_discard_index, rif_index);
mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
+ mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
if (err)
goto err_ratr_write;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 433f14ade464..8fd7d858f3c8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -617,7 +617,7 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
TRAP_TO_CPU),
MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, L3_EXCEPTIONS,
TRAP_TO_CPU),
- MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, L3_EXCEPTIONS,
+ MLXSW_SP_RXL_EXCEPTION(RTR_EGRESS0, L3_EXCEPTIONS,
TRAP_EXCEPTION_TO_CPU),
},
},
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 57f9e24602d0..9e070ab3ed76 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -52,6 +52,7 @@ enum {
MLXSW_TRAP_ID_RTR_INGRESS1 = 0x71,
MLXSW_TRAP_ID_IPV6_PIM = 0x79,
MLXSW_TRAP_ID_IPV6_VRRP = 0x7A,
+ MLXSW_TRAP_ID_RTR_EGRESS0 = 0x80,
MLXSW_TRAP_ID_IPV4_BGP = 0x88,
MLXSW_TRAP_ID_IPV6_BGP = 0x89,
MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A,
--
2.28.0
^ permalink raw reply related
* [PATCH net-next 03/10] mlxsw: spectrum_router: Use loopback RIF for unresolved nexthops
From: Ido Schimmel @ 2020-11-23 7:12 UTC (permalink / raw)
To: netdev; +Cc: davem, kuba, jiri, dsahern, mlxsw, Ido Schimmel
In-Reply-To: <20201123071230.676469-1-idosch@idosch.org>
From: Ido Schimmel <idosch@nvidia.com>
Now that the driver creates a loopback RIF during its initialization, it
can be used to program the adjacency entries for unresolved nexthops
instead of other RIFs. The loopback RIF is guaranteed to exist for the
entire life time of the driver, unlike other RIFs that come and go.
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 84ed068d17f8..53d04e7993f6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -4994,7 +4994,7 @@ int mlxsw_sp_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
return err;
}
-static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index)
+static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp)
{
enum mlxsw_reg_ratr_trap_action trap_action;
char ratr_pl[MLXSW_REG_RATR_LEN];
@@ -5011,7 +5011,8 @@ static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index)
trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
MLXSW_REG_RATR_TYPE_ETHERNET,
- mlxsw_sp->router->adj_discard_index, rif_index);
+ mlxsw_sp->router->adj_discard_index,
+ mlxsw_sp->router->lb_rif_index);
mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
@@ -5051,8 +5052,7 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
adjacency_index = nhgi->adj_index;
ecmp_size = nhgi->ecmp_size;
} else if (!nhgi->adj_index_valid && nhgi->count && nhgi->nh_rif) {
- err = mlxsw_sp_adj_discard_write(mlxsw_sp,
- nhgi->nh_rif->rif_index);
+ err = mlxsw_sp_adj_discard_write(mlxsw_sp);
if (err)
return err;
trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
--
2.28.0
^ permalink raw reply related
* [PATCH net-next 04/10] mlxsw: spectrum_router: Resolve RIF from nexthop struct instead of neighbour
From: Ido Schimmel @ 2020-11-23 7:12 UTC (permalink / raw)
To: netdev; +Cc: davem, kuba, jiri, dsahern, mlxsw, Ido Schimmel
In-Reply-To: <20201123071230.676469-1-idosch@idosch.org>
From: Ido Schimmel <idosch@nvidia.com>
The two are the same, but for blackhole nexthops we will not have an
associated neighbour struct, so resolve the RIF from the nexthop struct
itself instead.
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 53d04e7993f6..ef0e4e452f47 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3284,7 +3284,7 @@ static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
true, MLXSW_REG_RATR_TYPE_ETHERNET,
- adj_index, neigh_entry->rif);
+ adj_index, nh->rif->rif_index);
mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
if (nh->counter_valid)
mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
--
2.28.0
^ permalink raw reply related
* [PATCH net-next 01/10] mlxsw: spectrum_router: Create loopback RIF during initialization
From: Ido Schimmel @ 2020-11-23 7:12 UTC (permalink / raw)
To: netdev; +Cc: davem, kuba, jiri, dsahern, mlxsw, Ido Schimmel
In-Reply-To: <20201123071230.676469-1-idosch@idosch.org>
From: Ido Schimmel <idosch@nvidia.com>
Up until now RIFs (router interfaces) were created on demand (e.g.,
when an IP address was added to a netdev). However, sometimes the device
needs to be provided with a RIF when one might not be available.
For example, adjacency entries that drop packets need to be programmed
with an egress RIF despite the RIF not being used to forward packets.
Create such a RIF during initialization so that it could be used later
on to support blackhole nexthops.
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
.../ethernet/mellanox/mlxsw/spectrum_router.c | 31 +++++++++++++++++++
.../ethernet/mellanox/mlxsw/spectrum_router.h | 1 +
2 files changed, 32 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 42a7bec3fd88..c61751e67750 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -8918,6 +8918,30 @@ static void mlxsw_sp_router_ll_op_ctx_fini(struct mlxsw_sp_router *router)
kfree(router->ll_op_ctx);
}
+static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp)
+{
+ u16 lb_rif_index;
+ int err;
+
+ /* Create a generic loopback RIF associated with the main table
+ * (default VRF). Any table can be used, but the main table exists
+ * anyway, so we do not waste resources.
+ */
+ err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN,
+ &lb_rif_index);
+ if (err)
+ return err;
+
+ mlxsw_sp->router->lb_rif_index = lb_rif_index;
+
+ return 0;
+}
+
+static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index);
+}
+
int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
struct netlink_ext_ack *extack)
{
@@ -8974,6 +8998,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
if (err)
goto err_vrs_init;
+ err = mlxsw_sp_lb_rif_init(mlxsw_sp);
+ if (err)
+ goto err_lb_rif_init;
+
err = mlxsw_sp_neigh_init(mlxsw_sp);
if (err)
goto err_neigh_init;
@@ -9039,6 +9067,8 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
err_mp_hash_init:
mlxsw_sp_neigh_fini(mlxsw_sp);
err_neigh_init:
+ mlxsw_sp_lb_rif_fini(mlxsw_sp);
+err_lb_rif_init:
mlxsw_sp_vrs_fini(mlxsw_sp);
err_vrs_init:
mlxsw_sp_mr_fini(mlxsw_sp);
@@ -9074,6 +9104,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
mlxsw_core_flush_owq();
WARN_ON(!list_empty(&mlxsw_sp->router->fib_event_queue));
mlxsw_sp_neigh_fini(mlxsw_sp);
+ mlxsw_sp_lb_rif_fini(mlxsw_sp);
mlxsw_sp_vrs_fini(mlxsw_sp);
mlxsw_sp_mr_fini(mlxsw_sp);
mlxsw_sp_lpm_fini(mlxsw_sp);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 023f70827db0..f9a59d454e28 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -75,6 +75,7 @@ struct mlxsw_sp_router {
/* One set of ops for each protocol: IPv4 and IPv6 */
const struct mlxsw_sp_router_ll_ops *proto_ll_ops[MLXSW_SP_L3_PROTO_MAX];
struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx;
+ u16 lb_rif_index;
};
struct mlxsw_sp_fib_entry_priv {
--
2.28.0
^ permalink raw reply related
* [PATCH net-next 00/10] mlxsw: Add support for blackhole nexthops
From: Ido Schimmel @ 2020-11-23 7:12 UTC (permalink / raw)
To: netdev; +Cc: davem, kuba, jiri, dsahern, mlxsw, Ido Schimmel
From: Ido Schimmel <idosch@nvidia.com>
This patch set adds support for blackhole nexthops in mlxsw. These
nexthops are exactly the same as other nexthops, but instead of
forwarding packets to an egress router interface (RIF), they are
programmed to silently drop them.
Patches #1-#4 are preparations.
Patch #5 adds support for blackhole nexthops and removes the check that
prevented them from being programmed.
Patch #6 adds a selftests over mlxsw which tests that blackhole nexthops
can be programmed and are marked as offloaded.
Patch #7 extends the existing nexthop forwarding test to also test
blackhole functionality.
Patches #8-#10 add support for a new packet trap ('blackhole_nexthop')
which should be triggered whenever packets are dropped by a blackhole
nexthop. Obviously, by default, the trap action is set to 'drop' so that
dropped packets will not be reported.
Ido Schimmel (10):
mlxsw: spectrum_router: Create loopback RIF during initialization
mlxsw: spectrum_router: Use different trap identifier for unresolved
nexthops
mlxsw: spectrum_router: Use loopback RIF for unresolved nexthops
mlxsw: spectrum_router: Resolve RIF from nexthop struct instead of
neighbour
mlxsw: spectrum_router: Add support for blackhole nexthops
selftests: mlxsw: Add blackhole nexthop configuration tests
selftests: forwarding: Add blackhole nexthops tests
devlink: Add blackhole_nexthop trap
mlxsw: spectrum_trap: Add blackhole_nexthop trap
selftests: mlxsw: Add blackhole_nexthop trap test
.../networking/devlink/devlink-trap.rst | 4 +
.../ethernet/mellanox/mlxsw/spectrum_dpipe.c | 9 +-
.../ethernet/mellanox/mlxsw/spectrum_router.c | 92 ++++++++++++++++---
.../ethernet/mellanox/mlxsw/spectrum_router.h | 2 +
.../ethernet/mellanox/mlxsw/spectrum_trap.c | 8 +-
drivers/net/ethernet/mellanox/mlxsw/trap.h | 1 +
include/net/devlink.h | 4 +-
net/core/devlink.c | 1 +
.../net/mlxsw/devlink_trap_l3_drops.sh | 36 ++++++++
.../selftests/drivers/net/mlxsw/rtnetlink.sh | 25 ++++-
.../net/forwarding/router_mpath_nh.sh | 58 +++++++++++-
11 files changed, 218 insertions(+), 22 deletions(-)
--
2.28.0
^ permalink raw reply
* Re: [PATCH net-next 2/3] mlxsw: spectrum_ptp: use PTP wide message type definitions
From: Ido Schimmel @ 2020-11-23 6:59 UTC (permalink / raw)
To: Christian Eggers
Cc: Richard Cochran, Andrew Lunn, Heiner Kallweit, Jakub Kicinski,
Vladimir Oltean, Russell King, David S . Miller, netdev,
linux-kernel, Christian Eggers, Petr Machata, Jiri Pirko,
Ido Schimmel
In-Reply-To: <20201122143555.GA515025@shredder.lan>
On Sun, Nov 22, 2020 at 04:35:58PM +0200, Ido Schimmel wrote:
> Anyway, I added all six patches to our regression as we have some PTP
> tests. Will let you know tomorrow.
Looks good
^ permalink raw reply
* Re: [PATCH net-next v4 2/5] net/lapb: support netdev events
From: Martin Schiller @ 2020-11-23 6:55 UTC (permalink / raw)
To: Xie He
Cc: Andrew Hendry, David S. Miller, Jakub Kicinski, Linux X25,
Linux Kernel Network Developers, LKML
In-Reply-To: <CAJht_EP_oqCDs6mMThBZNtz4sgpbyQgMhKkHeqfS_7JmfEzfQg@mail.gmail.com>
On 2020-11-21 00:50, Xie He wrote:
> On Fri, Nov 20, 2020 at 3:11 PM Xie He <xie.he.0141@gmail.com> wrote:
>>
>> Should we also handle the NETDEV_UP event here? In previous versions
>> of this patch series you seemed to want to establish the L2 connection
>> on device-up. But in this patch, you didn't handle NETDEV_UP.
>>
>> Maybe on device-up, we need to check if the carrier is up, and if it
>> is, we do the same thing as we do on carrier-up.
>
> Are the device up/down status and the carrier up/down status
> independent of each other? If they are, on device-up or carrier-up, we
> only need to try establishing the L2 connection if we see both are up.
No, they aren't independent. The carrier can only be up if the device /
interface is UP. And as far as I can see a NETDEV_CHANGE event will also
only be generated on interfaces that are UP.
So you can be sure, that if there is a NETDEV_CHANGE event then the
device is UP.
I removed the NETDEV_UP handling because I don't think it makes sense
to implicitly try to establish layer2 (LAPB) if there is no carrier.
And with the first X.25 connection request on that interface, it will
be established anyway by x25_transmit_link().
I've tested it here with an HDLC WAN Adapter and it works as expected.
These are also the ideal conditions for the already mentioned "on
demand" scenario. The only necessary change would be to call
x25_terminate_link() on an interface after clearing the last X.25
session.
> On NETDEV_GOING_DOWN, we can also check the carrier status first and
> if it is down, we don't need to call lapb_disconnect_request.
This is not necessary because lapb_disconnect_request() checks the
current state. And if the carrier is DOWN then the state should also be
LAPB_STATE_0 and so lapb_disconnect_request() does nothing.
^ permalink raw reply
* Re: [PATCH net-next v5] net/tun: Call netdev notifiers
From: Martin Schiller @ 2020-11-23 6:18 UTC (permalink / raw)
To: Jakub Kicinski; +Cc: davem, netdev, linux-kernel
In-Reply-To: <20201120102827.6b432dc5@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com>
On 2020-11-20 19:28, Jakub Kicinski wrote:
> On Wed, 18 Nov 2020 07:39:19 +0100 Martin Schiller wrote:
>> Call netdev notifiers before and after changing the device type.
>>
>> Signed-off-by: Martin Schiller <ms@dev.tdt.de>
>
> This is a fix, right? Can you give an example of something that goes
> wrong without this patch?
This change is related to my latest patches to the X.25 Subsystem:
https://patchwork.kernel.org/project/netdevbpf/list/?series=388087
I use a tun interface in a XoT (X.25 over TCP) application and use the
TUNSETLINK ioctl to change the device type to ARPHRD_X25.
As the default device type is ARPHRD_NONE the initial NETDEV_REGISTER
event won't be catched by the X.25 Stack.
Therefore I have to use the NETDEV_POST_TYPE_CHANGE to make sure that
the corresponding neighbour structure is created.
I could imagine that other protocols have similar requirements.
Whether this is a fix or a functional extension is hard to say.
Some time ago there was also a corresponding patch for the WAN/HDLC
subsystem:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit?id=2f8364a291e8
^ permalink raw reply
* [PATCH net v2] devlink: Fix reload stats structure
From: Moshe Shemesh @ 2020-11-23 5:36 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Jiri Pirko, netdev, linux-kernel, Moshe Shemesh
Fix reload stats structure exposed to the user. Change stats structure
hierarchy to have the reload action as a parent of the stat entry and
then stat entry includes value per limit. This will also help to avoid
string concatenation on iproute2 output.
Reload stats structure before this fix:
"stats": {
"reload": {
"driver_reinit": 2,
"fw_activate": 1,
"fw_activate_no_reset": 0
}
}
After this fix:
"stats": {
"reload": {
"driver_reinit": {
"unspecified": 2
},
"fw_activate": {
"unspecified": 1,
"no_reset": 0
}
}
Fixes: a254c264267e ("devlink: Add reload stats")
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
v1 -> v2:
- Fold comment at 80 characters.
---
include/uapi/linux/devlink.h | 2 ++
net/core/devlink.c | 49 +++++++++++++++++++++++-------------
2 files changed, 34 insertions(+), 17 deletions(-)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 0113bc4db9f5..5203f54a2be1 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -526,6 +526,8 @@ enum devlink_attr {
DEVLINK_ATTR_RELOAD_STATS_LIMIT, /* u8 */
DEVLINK_ATTR_RELOAD_STATS_VALUE, /* u32 */
DEVLINK_ATTR_REMOTE_RELOAD_STATS, /* nested */
+ DEVLINK_ATTR_RELOAD_ACTION_INFO, /* nested */
+ DEVLINK_ATTR_RELOAD_ACTION_STATS, /* nested */
/* add new attributes above here, update the policy in devlink.c */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index e6fb1fdedded..bf79d018990c 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -517,8 +517,7 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l
return test_bit(limit, &devlink->ops->reload_limits);
}
-static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action,
- enum devlink_reload_limit limit, u32 value)
+static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_limit limit, u32 value)
{
struct nlattr *reload_stats_entry;
@@ -526,8 +525,7 @@ static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_acti
if (!reload_stats_entry)
return -EMSGSIZE;
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action) ||
- nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
goto nla_put_failure;
nla_nest_end(msg, reload_stats_entry);
@@ -540,7 +538,7 @@ static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_acti
static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
{
- struct nlattr *reload_stats_attr;
+ struct nlattr *reload_stats_attr, *action_info_attr, *action_stats_attr;
int i, j, stat_idx;
u32 value;
@@ -552,17 +550,28 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink
if (!reload_stats_attr)
return -EMSGSIZE;
- for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
- /* Remote stats are shown even if not locally supported. Stats
- * of actions with unspecified limit are shown though drivers
- * don't need to register unspecified limit.
- */
- if (!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
- !devlink_reload_limit_is_supported(devlink, j))
+ for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
+ if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) ||
+ i == DEVLINK_RELOAD_ACTION_UNSPEC)
continue;
- for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
- if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) ||
- i == DEVLINK_RELOAD_ACTION_UNSPEC ||
+ action_info_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO);
+ if (!action_info_attr)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i))
+ goto action_info_nest_cancel;
+ action_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS);
+ if (!action_stats_attr)
+ goto action_info_nest_cancel;
+
+ for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
+ /* Remote stats are shown even if not locally supported.
+ * Stats of actions with unspecified limit are shown
+ * though drivers don't need to register unspecified
+ * limit.
+ */
+ if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
+ !devlink_reload_limit_is_supported(devlink, j)) ||
devlink_reload_combination_is_invalid(i, j))
continue;
@@ -571,13 +580,19 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink
value = devlink->stats.reload_stats[stat_idx];
else
value = devlink->stats.remote_reload_stats[stat_idx];
- if (devlink_reload_stat_put(msg, i, j, value))
- goto nla_put_failure;
+ if (devlink_reload_stat_put(msg, j, value))
+ goto action_stats_nest_cancel;
}
+ nla_nest_end(msg, action_stats_attr);
+ nla_nest_end(msg, action_info_attr);
}
nla_nest_end(msg, reload_stats_attr);
return 0;
+action_stats_nest_cancel:
+ nla_nest_cancel(msg, action_stats_attr);
+action_info_nest_cancel:
+ nla_nest_cancel(msg, action_info_attr);
nla_put_failure:
nla_nest_cancel(msg, reload_stats_attr);
return -EMSGSIZE;
--
2.18.2
^ permalink raw reply related
* Re: [PATCH net] devlink: Fix reload stats structure
From: Moshe Shemesh @ 2020-11-23 5:19 UTC (permalink / raw)
To: Jakub Kicinski, Moshe Shemesh
Cc: David S. Miller, Jiri Pirko, netdev, linux-kernel
In-Reply-To: <20201121145349.3824029c@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com>
On 11/22/2020 12:53 AM, Jakub Kicinski wrote:
> External email: Use caution opening links or attachments
>
>
> On Fri, 20 Nov 2020 15:40:37 +0200 Moshe Shemesh wrote:
>> Fix reload stats structure exposed to the user. Change stats structure
>> hierarchy to have the reload action as a parent of the stat entry and
>> then stat entry includes value per limit. This will also help to avoid
>> string concatenation on iproute2 output.
>>
>> Reload stats structure before this fix:
>> "stats": {
>> "reload": {
>> "driver_reinit": 2,
>> "fw_activate": 1,
>> "fw_activate_no_reset": 0
>> }
>> }
>>
>> After this fix:
>> "stats": {
>> "reload": {
>> "driver_reinit": {
>> "unspecified": 2
>> },
>> "fw_activate": {
>> "unspecified": 1,
>> "no_reset": 0
>> }
>> }
>>
>> Fixes: a254c264267e ("devlink: Add reload stats")
>> Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
>> Reviewed-by: Jiri Pirko <jiri@nvidia.com>
> At least try to fold the core networking code at 80 characters *please*.
>
> You folded the comments at 86 chars, neither 100 nor 80.
Oh, I missed that comment folding while replacing it in this patch. I
will fix, thanks.
^ permalink raw reply
* Re: [EXT] [PATCH] aquantia: Remove the build_skb path
From: Ramsay, Lincoln @ 2020-11-23 4:20 UTC (permalink / raw)
To: Igor Russkikh, David S. Miller, Jakub Kicinski,
netdev@vger.kernel.org, Dmitry Bogdanov [C]
In-Reply-To: <12fbca7a-86c9-ab97-d052-2a5cb0a4f145@marvell.com>
> Yep, that could be the only way to fix this for now.
> Have you tried to estimate any performance drops from this?
Unfortunately, I am not in a very good position to do this. The 10G interfaces on our device don't actually have enough raw PCI bandwidth available to hit 10G transfer rates.
I did use iperf3 and saw bursts over 2Gbit/sec (with average closer to 1.3Gbit/sec on a good run). There was no significant difference between running with and without the patch. I am told that this is about as good as can be expected.
Make of that what you will :)
Lincoln
^ permalink raw reply
* [PATCH net-next] net: ethernet: mediatek: support setting MTU
From: DENG Qingfang @ 2020-11-23 3:45 UTC (permalink / raw)
To: netdev, Felix Fietkau, John Crispin, Sean Wang, Mark Lee,
David S . Miller, Jakub Kicinski, Matthias Brugger, Russell King,
linux-arm-kernel, linux-mediatek
Cc: Frank Wunderlich, David Woodhouse, Andrew Lunn, Rene van Dorst
MT762x HW, except for MT7628, supports frame length up to 2048
(maximum length on GDM), so allow setting MTU up to 2030.
Signed-off-by: DENG Qingfang <dqfext@gmail.com>
---
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 36 ++++++++++++++++++++-
drivers/net/ethernet/mediatek/mtk_eth_soc.h | 11 +++++--
2 files changed, 43 insertions(+), 4 deletions(-)
Changes RFC -> v1:
Exclude MT7628
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 6d2d60675ffd..27cae3f43972 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -353,7 +353,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
/* Setup gmac */
mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
mcr_new = mcr_cur;
- mcr_new |= MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
+ mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK;
/* Only update control register when needed! */
@@ -2499,6 +2499,39 @@ static void mtk_uninit(struct net_device *dev)
mtk_rx_irq_disable(eth, ~0);
}
+static int mtk_change_mtu(struct net_device *dev, int new_mtu)
+{
+ int length = new_mtu + MTK_RX_ETH_HLEN;
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+ u32 mcr_cur, mcr_new;
+
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+ if (length > 1536)
+ return -EINVAL;
+ } else {
+ mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
+ mcr_new = mcr_cur & ~MAC_MCR_MAX_RX_LEN_MASK;
+
+ if (length <= 1518)
+ mcr_new |= MAC_MCR_MAX_RX_LEN(MAC_MCR_MAX_RX_LEN_1518);
+ else if (length <= 1536)
+ mcr_new |= MAC_MCR_MAX_RX_LEN(MAC_MCR_MAX_RX_LEN_1536);
+ else if (length <= 1552)
+ mcr_new |= MAC_MCR_MAX_RX_LEN(MAC_MCR_MAX_RX_LEN_1552);
+ else
+ mcr_new |= MAC_MCR_MAX_RX_LEN(MAC_MCR_MAX_RX_LEN_2048);
+
+ /* Only update control register when needed! */
+ if (mcr_new != mcr_cur)
+ mtk_w32(mac->hw, mcr_new, MTK_MAC_MCR(mac->id));
+ }
+
+ dev->mtu = new_mtu;
+
+ return 0;
+}
+
static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
struct mtk_mac *mac = netdev_priv(dev);
@@ -2795,6 +2828,7 @@ static const struct net_device_ops mtk_netdev_ops = {
.ndo_set_mac_address = mtk_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
.ndo_do_ioctl = mtk_do_ioctl,
+ .ndo_change_mtu = mtk_change_mtu,
.ndo_tx_timeout = mtk_tx_timeout,
.ndo_get_stats64 = mtk_get_stats64,
.ndo_fix_features = mtk_fix_features,
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 454cfcd465fd..cfc11654ccd6 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -17,12 +17,12 @@
#include <linux/phylink.h>
#define MTK_QDMA_PAGE_SIZE 2048
-#define MTK_MAX_RX_LENGTH 1536
+#define MTK_MAX_RX_LENGTH 2048
#define MTK_TX_DMA_BUF_LEN 0x3fff
#define MTK_DMA_SIZE 256
#define MTK_NAPI_WEIGHT 64
#define MTK_MAC_COUNT 2
-#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
+#define MTK_RX_ETH_HLEN (ETH_HLEN + ETH_FCS_LEN)
#define MTK_RX_HLEN (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN)
#define MTK_DMA_DUMMY_DESC 0xffffffff
#define MTK_DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | \
@@ -320,7 +320,12 @@
/* Mac control registers */
#define MTK_MAC_MCR(x) (0x10100 + (x * 0x100))
-#define MAC_MCR_MAX_RX_1536 BIT(24)
+#define MAC_MCR_MAX_RX_LEN_MASK GENMASK(25, 24)
+#define MAC_MCR_MAX_RX_LEN(_x) (MAC_MCR_MAX_RX_LEN_MASK & ((_x) << 24))
+#define MAC_MCR_MAX_RX_LEN_1518 0x0
+#define MAC_MCR_MAX_RX_LEN_1536 0x1
+#define MAC_MCR_MAX_RX_LEN_1552 0x2
+#define MAC_MCR_MAX_RX_LEN_2048 0x3
#define MAC_MCR_IPG_CFG (BIT(18) | BIT(16))
#define MAC_MCR_FORCE_MODE BIT(15)
#define MAC_MCR_TX_EN BIT(14)
--
2.25.1
^ permalink raw reply related
* Re: [PATCH net-next v4 0/5] bonding: rename bond components
From: Jarod Wilson @ 2020-11-23 3:39 UTC (permalink / raw)
To: Jakub Kicinski
Cc: Jay Vosburgh, LKML, Veaceslav Falico, Andy Gospodarek,
David S. Miller, Thomas Davis, Netdev
In-Reply-To: <20201111140429.25ab20b1@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com>
On Wed, Nov 11, 2020 at 5:04 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Wed, 11 Nov 2020 12:13:56 -0800 Jay Vosburgh wrote:
> > Jarod Wilson <jarod@redhat.com> wrote:
> >
> > >The bonding driver's use of master and slave, while largely understood
> > >in technical circles, poses a barrier for inclusion to some potential
> > >members of the development and user community, due to the historical
> > >context of masters and slaves, particularly in the United States. This
> > >is a first full pass at replacing those phrases with more socially
> > >inclusive ones, opting for bond to replace master and port to
> > >replace slave, which is congruent with the bridge and team drivers.
> > >
> > >There are a few problems with this change. First up, "port" is used in
> > >the bonding 802.3ad code, so the first step here is to rename port to
> > >ad_port, so we can reuse port. Second, we have the issue of not wanting
> > >to break any existing userspace, which I believe this patchset
> > >accomplishes, preserving all existing sysfs and procfs interfaces, and
> > >adding module parameter aliases where necessary.
> > >
> > >Third, we do still have the issue of ease of backporting fixes to
> > >-stable trees. I've not had a huge amount of time to spend on it, but
> > >brief forays into coccinelle didn't really pay off (since it's meant to
> > >operate on code, not patches), and the best solution I can come up with
> > >is providing a shell script someone could run over git-format-patch
> > >output before git-am'ing the result to a -stable tree, though scripting
> > >these changes in the first place turned out to be not the best thing to
> > >do anyway, due to subtle cases where use of master or slave can NOT yet
> > >be replaced, so a large amount of work was done by hand, inspection,
> > >trial and error, which is why this set is a lot longer in coming than
> > >I'd originally hoped. I don't expect -stable backports to be horrible to
> > >figure out one way or another though, and I don't believe that a bit of
> > >inconvenience on that front is enough to warrant not making these
> > >changes.
> >
> > I think this undersells the impact a bit; this will most likely
> > break the majority of cherry-picks for the bonding driver to stable
> > going forward should this patch set be committed. Yes, the volume of
> > patches to bonding is relatively low, and the manual backports are not
> > likely to be technically difficult. Nevertheless, I expect that most
> > bonding backports to stable that cross this patch set will require
> > manual intervention.
> >
> > As such, I'd still like to see explicit direction from the
> > kernel development community leadership that change sets of this nature
> > (not technically driven, with long term maintenance implications) are
> > changes that should be undertaken rather than are merely permitted.
>
> Yeah, IDK. I think it's up to you as the maintainer of this code to
> make a call based on the specific circumstances. All we have AFAIK
> is the coding style statement which discourages new uses:
>
> For symbol names and documentation, avoid introducing new usage of
> 'master / slave' (or 'slave' independent of 'master') and 'blacklist /
> whitelist'.
>
> Recommended replacements for 'master / slave' are:
> '{primary,main} / {secondary,replica,subordinate}'
> '{initiator,requester} / {target,responder}'
> '{controller,host} / {device,worker,proxy}'
> 'leader / follower'
> 'director / performer'
>
> Recommended replacements for 'blacklist/whitelist' are:
> 'denylist / allowlist'
> 'blocklist / passlist'
>
> Exceptions for introducing new usage is to maintain a userspace ABI/API,
> or when updating code for an existing (as of 2020) hardware or protocol
> specification that mandates those terms. For new specifications
> translate specification usage of the terminology to the kernel coding
> standard where possible.
Haven't been able to put much time into this the past few weeks, too
many other things going on leading into the holidays... But I think
Red Hat's general stance on this is that leaving things the way they
are is akin to condoning them. For change to happen, change needs to
happen. I know this starts to get political in a hurry though. I'd
like to see the changes made, even if there's a bit of pain involved
(clearly, since I've dumped this much time and effort into it so far).
:)
I'll try to address issues raised with this version, including the
checkpatch bits, but it may not be until after the first of the year
at this point, with assorted projects trying to get wrapped up before
the holidays, then the holidays themselves, etc.
--
Jarod Wilson
jarod@redhat.com
^ permalink raw reply
* Re: [PATCH] compiler_attribute: remove CONFIG_ENABLE_MUST_CHECK
From: Masahiro Yamada @ 2020-11-23 3:36 UTC (permalink / raw)
To: Miguel Ojeda
Cc: Jason A. Donenfeld, Shuah Khan, linux-kernel,
open list:KERNEL SELFTEST FRAMEWORK, Network Development,
wireguard, clang-built-linux, Nick Desaulniers
In-Reply-To: <CANiq72nL7yxGj-Q6aOxG68967g_fB6=hDED0mTBrZ_SjC=U-Pg@mail.gmail.com>
On Sun, Nov 22, 2020 at 5:45 AM Miguel Ojeda
<miguel.ojeda.sandonis@gmail.com> wrote:
>
> On Sat, Nov 21, 2020 at 8:44 PM Masahiro Yamada <masahiroy@kernel.org> wrote:
> >
> > Our goal is to always enable __must_check where appreciate, so this
> > CONFIG option is no longer needed.
>
> This would be great. It also implies we can then move it to
> `compiler_attributes.h` since it does not depend on config options
> anymore.
>
> We should also rename it to `__nodiscard`, since that is the
> standardized name (coming soon to C2x and in C++ for years).
>
> Cc'ing the Clang folks too to make them aware.
>
I can move it to compiler_attribute.h
This attribute is supported by gcc, clang, and icc.
https://godbolt.org/z/ehd6so
I can send v2.
I do not mind renaming, but it should be done in a separate patch.
--
Best Regards
Masahiro Yamada
^ permalink raw reply
* [PATCH net] bonding: fix feature flag setting at init time
From: Jarod Wilson @ 2020-11-23 3:17 UTC (permalink / raw)
To: linux-kernel
Cc: Jarod Wilson, Ivan Vecera, Jay Vosburgh, Veaceslav Falico,
Andy Gospodarek, David S. Miller, Jakub Kicinski, Thomas Davis,
netdev
Have run into a case where bond_option_mode_set() gets called before
hw_features has been filled in, and very bad things happen when
netdev_change_features() then gets called, because the empty hw_features
wipes out almost all features. Further reading of netdev feature flag
documentation suggests drivers aren't supposed to touch wanted_features,
so this changes bond_option_mode_set() to use netdev_increment_features()
and &= ~BOND_XFRM_FEATURES on mode changes and then only calling
netdev_features_change() if there was actually a change of features. This
specifically fixes bonding on top of mlxsw interfaces, and has been
regression-tested with ixgbe interfaces. This change also simplifies the
xfrm-specific code in bond_setup() a little bit as well.
Fixes: a3b658cfb664 ("bonding: allow xfrm offload setup post-module-load")
Reported-by: Ivan Vecera <ivecera@redhat.com>
Cc: Jay Vosburgh <j.vosburgh@gmail.com>
Cc: Veaceslav Falico <vfalico@gmail.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Thomas Davis <tadavis@lbl.gov>
Cc: netdev@vger.kernel.org
Signed-off-by: Jarod Wilson <jarod@redhat.com>
---
drivers/net/bonding/bond_main.c | 10 ++++------
drivers/net/bonding/bond_options.c | 14 +++++++++++---
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 71c9677d135f..b8e0cb4f9480 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4721,15 +4721,13 @@ void bond_setup(struct net_device *bond_dev)
NETIF_F_HW_VLAN_CTAG_FILTER;
bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
-#ifdef CONFIG_XFRM_OFFLOAD
- bond_dev->hw_features |= BOND_XFRM_FEATURES;
-#endif /* CONFIG_XFRM_OFFLOAD */
bond_dev->features |= bond_dev->hw_features;
bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
#ifdef CONFIG_XFRM_OFFLOAD
- /* Disable XFRM features if this isn't an active-backup config */
- if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)
- bond_dev->features &= ~BOND_XFRM_FEATURES;
+ bond_dev->hw_features |= BOND_XFRM_FEATURES;
+ /* Only enable XFRM features if this is an active-backup config */
+ if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
+ bond_dev->features |= BOND_XFRM_FEATURES;
#endif /* CONFIG_XFRM_OFFLOAD */
}
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 9abfaae1c6f7..bce34648d97d 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -748,6 +748,9 @@ const struct bond_option *bond_opt_get(unsigned int option)
static int bond_option_mode_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
+ netdev_features_t features = bond->dev->features;
+ netdev_features_t mask = features & BOND_XFRM_FEATURES;
+
if (!bond_mode_uses_arp(newval->value)) {
if (bond->params.arp_interval) {
netdev_dbg(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n",
@@ -769,10 +772,15 @@ static int bond_option_mode_set(struct bonding *bond,
#ifdef CONFIG_XFRM_OFFLOAD
if (newval->value == BOND_MODE_ACTIVEBACKUP)
- bond->dev->wanted_features |= BOND_XFRM_FEATURES;
+ features = netdev_increment_features(features,
+ BOND_XFRM_FEATURES, mask);
else
- bond->dev->wanted_features &= ~BOND_XFRM_FEATURES;
- netdev_change_features(bond->dev);
+ features &= ~BOND_XFRM_FEATURES;
+
+ if (bond->dev->features != features) {
+ bond->dev->features = features;
+ netdev_features_change(bond->dev);
+ }
#endif /* CONFIG_XFRM_OFFLOAD */
/* don't cache arp_validate between modes */
--
2.28.0
^ permalink raw reply related
* Re: [PATCHv4 net-next 2/3] octeontx2-af: Add devlink health reporters for NPA
From: George Cherian @ 2020-11-23 2:49 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
kuba@kernel.org, davem@davemloft.net, Sunil Kovvuri Goutham,
Linu Cherian, Geethasowjanya Akula, masahiroy@kernel.org,
willemdebruijn.kernel@gmail.com, saeed@kernel.org
> -----Original Message-----
> From: Jiri Pirko <jiri@resnulli.us>
> Sent: Saturday, November 21, 2020 7:44 PM
> To: George Cherian <gcherian@marvell.com>
> Cc: netdev@vger.kernel.org; linux-kernel@vger.kernel.org;
> kuba@kernel.org; davem@davemloft.net; Sunil Kovvuri Goutham
> <sgoutham@marvell.com>; Linu Cherian <lcherian@marvell.com>;
> Geethasowjanya Akula <gakula@marvell.com>; masahiroy@kernel.org;
> willemdebruijn.kernel@gmail.com; saeed@kernel.org
> Subject: Re: [PATCHv4 net-next 2/3] octeontx2-af: Add devlink health
> reporters for NPA
>
> Sat, Nov 21, 2020 at 05:02:00AM CET, george.cherian@marvell.com wrote:
> >Add health reporters for RVU NPA block.
> >NPA Health reporters handle following HW event groups
> > - GENERAL events
> > - ERROR events
> > - RAS events
> > - RVU event
> >An event counter per event is maintained in SW.
> >
> >Output:
> > # devlink health
> > pci/0002:01:00.0:
> > reporter npa
> > state healthy error 0 recover 0
> > # devlink health dump show pci/0002:01:00.0 reporter npa
> > NPA_AF_GENERAL:
> > Unmap PF Error: 0
> > Free Disabled for NIX0 RX: 0
> > Free Disabled for NIX0 TX: 0
> > Free Disabled for NIX1 RX: 0
> > Free Disabled for NIX1 TX: 0
>
> This is for 2 ports if I'm not mistaken. Then you need to have this reporter
> per-port. Register ports and have reporter for each.
>
No, these are not port specific reports.
NIX is the Network Interface Controller co-processor block.
There are (max of) 2 such co-processor blocks per SoC.
Moreover, this is an NPA (Network Pool/Buffer Allocator co- processor) reporter.
This tells whether a free or alloc operation is skipped due to the configurations set by
other co-processor blocks (NIX,SSO,TIM etc).
https://www.kernel.org/doc/html/latest/networking/device_drivers/ethernet/marvell/octeontx2.html
> NAK.
^ permalink raw reply
* [PATCH] wlcore: Switch to using the new API kobj_to_dev()
From: Tian Tao @ 2020-11-23 1:47 UTC (permalink / raw)
To: kvalo, davem, kuba, linux-wireless, netdev
Switch to using the new API kobj_to_dev().
Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
---
drivers/net/wireless/ti/wlcore/sysfs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/wireless/ti/wlcore/sysfs.c b/drivers/net/wireless/ti/wlcore/sysfs.c
index 7ac1814..5cf0379 100644
--- a/drivers/net/wireless/ti/wlcore/sysfs.c
+++ b/drivers/net/wireless/ti/wlcore/sysfs.c
@@ -100,7 +100,7 @@ static ssize_t wl1271_sysfs_read_fwlog(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buffer, loff_t pos, size_t count)
{
- struct device *dev = container_of(kobj, struct device, kobj);
+ struct device *dev = kobj_to_dev(kobj);
struct wl1271 *wl = dev_get_drvdata(dev);
ssize_t len;
int ret;
--
2.7.4
^ permalink raw reply related
* Re: [kbuild-all] Re: [net-next,v2,4/5] seg6: add support for the SRv6 End.DT4 behavior
From: Rong Chen @ 2020-11-23 1:13 UTC (permalink / raw)
To: Jakub Kicinski, kernel test robot
Cc: Andrea Mayer, David S. Miller, David Ahern, Alexey Kuznetsov,
Hideaki YOSHIFUJI, Shuah Khan, Shrijeet Mukherjee,
Alexei Starovoitov, Daniel Borkmann, kbuild-all,
clang-built-linux, netdev
In-Reply-To: <20201113085730.5f3c850a@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com>
Hi Jakub,
Sorry for the inconvenience, we have optimized the build bot to avoid
this situation.
Best Regards,
Rong Chen
On 11/14/20 12:57 AM, Jakub Kicinski wrote:
> Good people of build bot,
>
> would you mind shedding some light on this one? It was also reported on
> v1, and Andrea said it's impossible to repro. Strange that build bot
> would make the same mistake twice, tho.
>
> Thanks!
>
> On Fri, 13 Nov 2020 17:23:09 +0800 kernel test robot wrote:
>> Hi Andrea,
>>
>> Thank you for the patch! Yet something to improve:
>>
>> [auto build test ERROR on ipvs/master]
>> [also build test ERROR on linus/master sparc-next/master v5.10-rc3 next-20201112]
>> [If your patch is applied to the wrong git tree, kindly drop us a note.
>> And when submitting patch, we suggest to use '--base' as documented in
>> https://git-scm.com/docs/git-format-patch]
>>
>> url: https://github.com/0day-ci/linux/commits/Andrea-Mayer/seg6-add-support-for-the-SRv6-End-DT4-behavior/20201109-093019
>> base: https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs.git master
>> config: x86_64-randconfig-a005-20201111 (attached as .config)
>> compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 874b0a0b9db93f5d3350ffe6b5efda2d908415d0)
>> reproduce (this is a W=1 build):
>> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>> chmod +x ~/bin/make.cross
>> # install x86_64 cross compiling tool for clang build
>> # apt-get install binutils-x86-64-linux-gnu
>> # https://github.com/0day-ci/linux/commit/761138e2f757ac64efe97b03311c976db242dc92
>> git remote add linux-review https://github.com/0day-ci/linux
>> git fetch --no-tags linux-review Andrea-Mayer/seg6-add-support-for-the-SRv6-End-DT4-behavior/20201109-093019
>> git checkout 761138e2f757ac64efe97b03311c976db242dc92
>> # save the attached .config to linux build tree
>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
>>
>> If you fix the issue, kindly add following tag as appropriate
>> Reported-by: kernel test robot <lkp@intel.com>
>>
>> All errors (new ones prefixed by >>):
>>
>>>> net/ipv6/seg6_local.c:793:4: error: field designator 'slwt_ops' does not refer to any field in type 'struct seg6_action_desc'
>> .slwt_ops = {
>> ^
>>>> net/ipv6/seg6_local.c:826:10: error: invalid application of 'sizeof' to an incomplete type 'struct seg6_action_desc []'
>> count = ARRAY_SIZE(seg6_action_table);
>> ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> include/linux/kernel.h:48:32: note: expanded from macro 'ARRAY_SIZE'
>> #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
>> ^~~~~
>> 2 errors generated.
>>
>> vim +793 net/ipv6/seg6_local.c
>>
>> 757
>> 758 static struct seg6_action_desc seg6_action_table[] = {
>> 759 {
>> 760 .action = SEG6_LOCAL_ACTION_END,
>> 761 .attrs = 0,
>> 762 .input = input_action_end,
>> 763 },
>> 764 {
>> 765 .action = SEG6_LOCAL_ACTION_END_X,
>> 766 .attrs = (1 << SEG6_LOCAL_NH6),
>> 767 .input = input_action_end_x,
>> 768 },
>> 769 {
>> 770 .action = SEG6_LOCAL_ACTION_END_T,
>> 771 .attrs = (1 << SEG6_LOCAL_TABLE),
>> 772 .input = input_action_end_t,
>> 773 },
>> 774 {
>> 775 .action = SEG6_LOCAL_ACTION_END_DX2,
>> 776 .attrs = (1 << SEG6_LOCAL_OIF),
>> 777 .input = input_action_end_dx2,
>> 778 },
>> 779 {
>> 780 .action = SEG6_LOCAL_ACTION_END_DX6,
>> 781 .attrs = (1 << SEG6_LOCAL_NH6),
>> 782 .input = input_action_end_dx6,
>> 783 },
>> 784 {
>> 785 .action = SEG6_LOCAL_ACTION_END_DX4,
>> 786 .attrs = (1 << SEG6_LOCAL_NH4),
>> 787 .input = input_action_end_dx4,
>> 788 },
>> 789 {
>> 790 .action = SEG6_LOCAL_ACTION_END_DT4,
>> 791 .attrs = (1 << SEG6_LOCAL_TABLE),
>> 792 .input = input_action_end_dt4,
>> > 793 .slwt_ops = {
>> 794 .build_state = seg6_end_dt4_build,
>> 795 },
>> 796 },
>> 797 {
>> 798 .action = SEG6_LOCAL_ACTION_END_DT6,
>> 799 .attrs = (1 << SEG6_LOCAL_TABLE),
>> 800 .input = input_action_end_dt6,
>> 801 },
>> 802 {
>> 803 .action = SEG6_LOCAL_ACTION_END_B6,
>> 804 .attrs = (1 << SEG6_LOCAL_SRH),
>> 805 .input = input_action_end_b6,
>> 806 },
>> 807 {
>> 808 .action = SEG6_LOCAL_ACTION_END_B6_ENCAP,
>> 809 .attrs = (1 << SEG6_LOCAL_SRH),
>> 810 .input = input_action_end_b6_encap,
>> 811 .static_headroom = sizeof(struct ipv6hdr),
>> 812 },
>> 813 {
>> 814 .action = SEG6_LOCAL_ACTION_END_BPF,
>> 815 .attrs = (1 << SEG6_LOCAL_BPF),
>> 816 .input = input_action_end_bpf,
>> 817 },
>> 818
>> 819 };
>> 820
>> 821 static struct seg6_action_desc *__get_action_desc(int action)
>> 822 {
>> 823 struct seg6_action_desc *desc;
>> 824 int i, count;
>> 825
>> > 826 count = ARRAY_SIZE(seg6_action_table);
>> 827 for (i = 0; i < count; i++) {
>> 828 desc = &seg6_action_table[i];
>> 829 if (desc->action == action)
>> 830 return desc;
>> 831 }
>> 832
>> 833 return NULL;
>> 834 }
>> 835
>>
>> ---
>> 0-DAY CI Kernel Test Service, Intel Corporation
>> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
> _______________________________________________
> kbuild-all mailing list -- kbuild-all@lists.01.org
> To unsubscribe send an email to kbuild-all-leave@lists.01.org
^ permalink raw reply
* Re: [RFC] MAINTAINERS tag for cleanup robot
From: Joe Perches @ 2020-11-23 0:53 UTC (permalink / raw)
To: Finn Thain
Cc: James Bottomley, Tom Rix, Matthew Wilcox, clang-built-linux,
linux-hyperv, linux-kernel, xen-devel, tboot-devel, kvm,
linux-crypto, linux-acpi, devel, amd-gfx, dri-devel, intel-gfx,
netdev, linux-media, MPT-FusionLinux.pdl, linux-scsi,
linux-wireless, ibm-acpi-devel, platform-driver-x86, linux-usb,
linux-omap, linux-fbdev, ecryptfs, linux-fsdevel, cluster-devel,
linux-mtd, keyrings, netfilter-devel, coreteam, alsa-devel, bpf,
linux-bluetooth, linux-nfs, patches
In-Reply-To: <alpine.LNX.2.23.453.2011230810210.7@nippy.intranet>
On Mon, 2020-11-23 at 09:33 +1100, Finn Thain wrote:
> On Sun, 22 Nov 2020, Joe Perches wrote:
> > But provably correct conversions IMO _should_ be done and IMO churn
> > considerations should generally have less importance.
[]
> Moreover, the patch review workload for skilled humans is being generated
> by the automation, which is completely backwards: the machine is supposed
> to be helping.
Which is why the provably correct matters.
coccinelle transforms can be, but are not necessarily, provably correct.
The _show transforms done via the sysfs_emit_dev.cocci script are correct
as in commit aa838896d87a ("drivers core: Use sysfs_emit and sysfs_emit_at
for show(device *...) functions")
Worthwhile? A different question, but I think yes as it reduces the
overall question space of the existing other sprintf overrun possibilities.
^ permalink raw reply
* Re: [RFC PATCH bpf-next 3/8] tcp: Migrate TCP_ESTABLISHED/TCP_SYN_RECV sockets in accept queues.
From: Martin KaFai Lau @ 2020-11-23 0:40 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: ast, benh, bpf, daniel, davem, edumazet, kuba, kuni1840,
linux-kernel, netdev
In-Reply-To: <20201121101322.97015-1-kuniyu@amazon.co.jp>
On Sat, Nov 21, 2020 at 07:13:22PM +0900, Kuniyuki Iwashima wrote:
> From: Martin KaFai Lau <kafai@fb.com>
> Date: Thu, 19 Nov 2020 17:53:46 -0800
> > On Fri, Nov 20, 2020 at 07:09:22AM +0900, Kuniyuki Iwashima wrote:
> > > From: Martin KaFai Lau <kafai@fb.com>
> > > Date: Wed, 18 Nov 2020 15:50:17 -0800
> > > > On Tue, Nov 17, 2020 at 06:40:18PM +0900, Kuniyuki Iwashima wrote:
> > > > > This patch lets reuseport_detach_sock() return a pointer of struct sock,
> > > > > which is used only by inet_unhash(). If it is not NULL,
> > > > > inet_csk_reqsk_queue_migrate() migrates TCP_ESTABLISHED/TCP_SYN_RECV
> > > > > sockets from the closing listener to the selected one.
> > > > >
> > > > > Listening sockets hold incoming connections as a linked list of struct
> > > > > request_sock in the accept queue, and each request has reference to a full
> > > > > socket and its listener. In inet_csk_reqsk_queue_migrate(), we unlink the
> > > > > requests from the closing listener's queue and relink them to the head of
> > > > > the new listener's queue. We do not process each request, so the migration
> > > > > completes in O(1) time complexity. However, in the case of TCP_SYN_RECV
> > > > > sockets, we will take special care in the next commit.
> > > > >
> > > > > By default, we select the last element of socks[] as the new listener.
> > > > > This behaviour is based on how the kernel moves sockets in socks[].
> > > > >
> > > > > For example, we call listen() for four sockets (A, B, C, D), and close the
> > > > > first two by turns. The sockets move in socks[] like below. (See also [1])
> > > > >
> > > > > socks[0] : A <-. socks[0] : D socks[0] : D
> > > > > socks[1] : B | => socks[1] : B <-. => socks[1] : C
> > > > > socks[2] : C | socks[2] : C --'
> > > > > socks[3] : D --'
> > > > >
> > > > > Then, if C and D have newer settings than A and B, and each socket has a
> > > > > request (a, b, c, d) in their accept queue, we can redistribute old
> > > > > requests evenly to new listeners.
> > > > I don't think it should emphasize/claim there is a specific way that
> > > > the kernel-pick here can redistribute the requests evenly. It depends on
> > > > how the application close/listen. The userspace can not expect the
> > > > ordering of socks[] will behave in a certain way.
> > >
> > > I've expected replacing listeners by generations as a general use case.
> > > But exactly. Users should not expect the undocumented kernel internal.
> > >
> > >
> > > > The primary redistribution policy has to depend on BPF which is the
> > > > policy defined by the user based on its application logic (e.g. how
> > > > its binary restart work). The application (and bpf) knows which one
> > > > is a dying process and can avoid distributing to it.
> > > >
> > > > The kernel-pick could be an optional fallback but not a must. If the bpf
> > > > prog is attached, I would even go further to call bpf to redistribute
> > > > regardless of the sysctl, so I think the sysctl is not necessary.
> > >
> > > I also think it is just an optional fallback, but to pick out a different
> > > listener everytime, choosing the moved socket was reasonable. So the even
> > > redistribution for a specific use case is a side effect of such socket
> > > selection.
> > >
> > > But, users should decide to use either way:
> > > (1) let the kernel select a new listener randomly
> > > (2) select a particular listener by eBPF
> > >
> > > I will update the commit message like:
> > > The kernel selects a new listener randomly, but as the side effect, it can
> > > redistribute packets evenly for a specific case where an application
> > > replaces listeners by generations.
> > Since there is no feedback on sysctl, so may be something missed
> > in the lines.
>
> I'm sorry, I have missed this point while thinking about each reply...
>
>
> > I don't think this migration logic should depend on a sysctl.
> > At least not when a bpf prog is attached that is capable of doing
> > migration, it is too fragile to ask user to remember to turn on
> > the sysctl before attaching the bpf prog.
> >
> > Your use case is to primarily based on bpf prog to pick or only based
> > on kernel to do a random pick?
Again, what is your primarily use case?
>
> I think we have to care about both cases.
>
> I think we can always enable the migration feature if eBPF prog is not
> attached. On the other hand, if BPF_PROG_TYPE_SK_REUSEPORT prog is attached
> to select a listener by some rules, along updating the kernel,
> redistributing requests without user intention can break the application.
> So, there is something needed to confirm user intension at least if eBPF
> prog is attached.
Right, something being able to tell if the bpf prog can do migration
can confirm the user intention here. However, this will not be a
sysctl.
A new bpf_attach_type "BPF_SK_REUSEPORT_SELECT_OR_MIGRATE" can be added.
"prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE"
can be used to decide if migration can be done by the bpf prog.
Although the prog->expected_attach_type has not been checked for
BPF_PROG_TYPE_SK_REUSEPORT, there was an earlier discussion
that the risk of breaking is very small and is acceptable.
Instead of depending on !reuse_md->data to decide if it
is doing migration or not, a clearer signal should be given
to the bpf prog. A "u8 migration" can be added to "struct sk_reuseport_kern"
(and to "struct sk_reuseport_md" accordingly). It can tell
the bpf prog that it is doing migration. It should also tell if it is
migrating a list of established sk(s) or an individual req_sk.
Accessing "reuse_md->migration" should only be allowed for
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE during is_valid_access().
During migration, if skb is not available, an empty skb can be used.
Migration is a slow path and does not happen very often, so it will
be fine even it has to create a temp skb (or may be a static const skb
can be used, not sure but this is implementation details).
>
> But honestly, I believe such eBPF users can follow this change and
> implement migration eBPF prog if we introduce such a breaking change.
>
>
> > Also, IIUC, this sysctl setting sticks at "*reuse", there is no way to
> > change it until all the listening sockets are closed which is exactly
> > the service disruption problem this series is trying to solve here.
>
> Oh, exactly...
> If we apply this series by live patching, we cannot enable the feature
> without service disruption.
>
> To enable the migration feature dynamically, how about this logic?
> In this logic, we do not save the sysctl value and check it at each time.
>
> 1. no eBPF prog attached -> ON
> 2. eBPF prog attached and sysctl is 0 -> OFF
No. When bpf prog is attached and it clearly signals (expected_attach_type
here) it can do migration, it should not depend on anything else. It is very
confusing to use. When a prog is successfully loaded, verified
and attached, it is expected to run.
This sysctl essentially only disables the bpf prog with
type == BPF_PROG_TYPE_SK_REUSEPORT running at a particular point.
This is going down a path that having another sysctl in the future
to disable another bpf prog type. If there would be a need to disable
bpf prog on a type-by-type bases, it would need a more
generic solution on the bpf side and do it in a consistent way
for all prog types. It needs a separate and longer discussion.
All behaviors of the BPF_SK_REUSEPORT_SELECT_OR_MIGRATE bpf prog
should not depend on this sysctl at all .
/* Pseudo code to show the idea only.
* Actual implementation should try to fit
* better into the current code and should look
* quite different from here.
*/
if ((prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
/* call bpf to migrate */
action = BPF_PROG_RUN(prog, &reuse_kern);
if (action == SK_PASS) {
if (!reuse_kern.selected_sk)
/* fallback to kernel random pick */
else
/* migrate to reuse_kern.selected_sk */
} else {
/* action == SK_DROP. don't do migration at all and
* don't fallback to kernel random pick.
*/
}
}
Going back to the sysctl, with BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
do you still have a need on adding sysctl_tcp_migrate_req?
Regardless, if there is still a need,
the document for sysctl_tcp_migrate_req should be something like:
"the kernel will do a random pick when there is no bpf prog
attached to the reuseport group...."
[ ps, my reply will be slow in this week. ]
> 3. eBPF prog attached and sysctl is 1 -> ON
>
> So, replacing
>
> if (reuse->migrate_req)
>
> to
>
> if (!reuse->prog || net->ipv4.sysctl_tcp_migrate_req)
^ permalink raw reply
* Re: [PATCH net-next] net: sched: alias action flags with TCA_ACT_ prefix
From: Jamal Hadi Salim @ 2020-11-23 0:11 UTC (permalink / raw)
To: Vlad Buslov, netdev, davem, kuba; +Cc: xiyou.wangcong, jiri
In-Reply-To: <20201121160902.808705-1-vlad@buslov.dev>
On 2020-11-21 11:09 a.m., Vlad Buslov wrote:
> Currently both filter and action flags use same "TCA_" prefix which makes
> them hard to distinguish to code and confusing for users. Create aliases
> for existing action flags constants with "TCA_ACT_" prefix.
>
> Signed-off-by: Vlad Buslov <vlad@buslov.dev>
LGTM, thanks for the effort
.
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
cheers,
jamal
^ permalink raw reply
* [PATCH net-next 2/2] ath10k: Constify static qmi structs
From: Rikard Falkeborn @ 2020-11-22 23:40 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Alex Elder, Kalle Valo, netdev, linux-kernel, ath10k,
linux-wireless, Rikard Falkeborn
In-Reply-To: <20201122234031.33432-1-rikard.falkeborn@gmail.com>
qmi_msg_handler[] and ath10k_qmi_ops are only used as input arguments
to qmi_handle_init() which accepts const pointers to both qmi_ops and
qmi_msg_handler. Make them const to allow the compiler to put them in
read-only memory.
Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
---
drivers/net/wireless/ath/ath10k/qmi.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireless/ath/ath10k/qmi.c b/drivers/net/wireless/ath/ath10k/qmi.c
index ae6b1f402adf..07e478f9a808 100644
--- a/drivers/net/wireless/ath/ath10k/qmi.c
+++ b/drivers/net/wireless/ath/ath10k/qmi.c
@@ -917,7 +917,7 @@ static void ath10k_qmi_msa_ready_ind(struct qmi_handle *qmi_hdl,
ath10k_qmi_driver_event_post(qmi, ATH10K_QMI_EVENT_MSA_READY_IND, NULL);
}
-static struct qmi_msg_handler qmi_msg_handler[] = {
+static const struct qmi_msg_handler qmi_msg_handler[] = {
{
.type = QMI_INDICATION,
.msg_id = QMI_WLFW_FW_READY_IND_V01,
@@ -981,7 +981,7 @@ static void ath10k_qmi_del_server(struct qmi_handle *qmi_hdl,
NULL);
}
-static struct qmi_ops ath10k_qmi_ops = {
+static const struct qmi_ops ath10k_qmi_ops = {
.new_server = ath10k_qmi_new_server,
.del_server = ath10k_qmi_del_server,
};
--
2.29.2
^ permalink raw reply related
* [PATCH net-next 1/2] soc: qcom: ipa: Constify static qmi structs
From: Rikard Falkeborn @ 2020-11-22 23:40 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Alex Elder, Kalle Valo, netdev, linux-kernel, ath10k,
linux-wireless, Rikard Falkeborn
In-Reply-To: <20201122234031.33432-1-rikard.falkeborn@gmail.com>
These are only used as input arguments to qmi_handle_init() which
accepts const pointers to both qmi_ops and qmi_msg_handler. Make them
const to allow the compiler to put them in read-only memory.
Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
---
drivers/net/ipa/ipa_qmi.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c
index 5090f0f923ad..d2c3f273c233 100644
--- a/drivers/net/ipa/ipa_qmi.c
+++ b/drivers/net/ipa/ipa_qmi.c
@@ -168,7 +168,7 @@ static void ipa_server_bye(struct qmi_handle *qmi, unsigned int node)
ipa_qmi->indication_sent = false;
}
-static struct qmi_ops ipa_server_ops = {
+static const struct qmi_ops ipa_server_ops = {
.bye = ipa_server_bye,
};
@@ -234,7 +234,7 @@ static void ipa_server_driver_init_complete(struct qmi_handle *qmi,
}
/* The server handles two request message types sent by the modem. */
-static struct qmi_msg_handler ipa_server_msg_handlers[] = {
+static const struct qmi_msg_handler ipa_server_msg_handlers[] = {
{
.type = QMI_REQUEST,
.msg_id = IPA_QMI_INDICATION_REGISTER,
@@ -261,7 +261,7 @@ static void ipa_client_init_driver(struct qmi_handle *qmi,
}
/* The client handles one response message type sent by the modem. */
-static struct qmi_msg_handler ipa_client_msg_handlers[] = {
+static const struct qmi_msg_handler ipa_client_msg_handlers[] = {
{
.type = QMI_RESPONSE,
.msg_id = IPA_QMI_INIT_DRIVER,
@@ -463,7 +463,7 @@ ipa_client_new_server(struct qmi_handle *qmi, struct qmi_service *svc)
return 0;
}
-static struct qmi_ops ipa_client_ops = {
+static const struct qmi_ops ipa_client_ops = {
.new_server = ipa_client_new_server,
};
--
2.29.2
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox