Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 05/10] mlxsw: spectrum_router: Add support for blackhole nexthops
From: Ido Schimmel @ 2020-11-23  7:12 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, jiri, dsahern, mlxsw, Ido Schimmel
In-Reply-To: <20201123071230.676469-1-idosch@idosch.org>

From: Ido Schimmel <idosch@nvidia.com>

Add support for blackhole nexthops by programming them to the adjacency
table with a discard action.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
 .../ethernet/mellanox/mlxsw/spectrum_dpipe.c  |  9 ++--
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 48 ++++++++++++++++---
 .../ethernet/mellanox/mlxsw/spectrum_router.h |  1 +
 3 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index daf029931b5f..ed81d4fa48ac 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -913,7 +913,8 @@ static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp)
 
 	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router)
 		if (mlxsw_sp_nexthop_offload(nh) &&
-		    !mlxsw_sp_nexthop_group_has_ipip(nh))
+		    !mlxsw_sp_nexthop_group_has_ipip(nh) &&
+		    !mlxsw_sp_nexthop_is_discard(nh))
 			size++;
 	return size;
 }
@@ -1105,7 +1106,8 @@ mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp,
 	nh_count = 0;
 	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
 		if (!mlxsw_sp_nexthop_offload(nh) ||
-		    mlxsw_sp_nexthop_group_has_ipip(nh))
+		    mlxsw_sp_nexthop_group_has_ipip(nh) ||
+		    mlxsw_sp_nexthop_is_discard(nh))
 			continue;
 
 		if (nh_count < nh_skip)
@@ -1186,7 +1188,8 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
 
 	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
 		if (!mlxsw_sp_nexthop_offload(nh) ||
-		    mlxsw_sp_nexthop_group_has_ipip(nh))
+		    mlxsw_sp_nexthop_group_has_ipip(nh) ||
+		    mlxsw_sp_nexthop_is_discard(nh))
 			continue;
 
 		mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index ef0e4e452f47..d551e9bc373c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2858,9 +2858,10 @@ struct mlxsw_sp_nexthop {
 	   offloaded:1, /* set in case the neigh is actually put into
 			 * KVD linear area of this group.
 			 */
-	   update:1; /* set indicates that MAC of this neigh should be
+	   update:1, /* set indicates that MAC of this neigh should be
 		      * updated in HW
 		      */
+	   discard:1; /* nexthop is programmed to discard packets */
 	enum mlxsw_sp_nexthop_type type;
 	union {
 		struct mlxsw_sp_neigh_entry *neigh_entry;
@@ -3011,6 +3012,11 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
 	return false;
 }
 
+bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh)
+{
+	return nh->discard;
+}
+
 struct mlxsw_sp_nexthop_group_cmp_arg {
 	enum mlxsw_sp_nexthop_group_type type;
 	union {
@@ -3285,7 +3291,11 @@ static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
 			    adj_index, nh->rif->rif_index);
-	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
+	if (nh->discard)
+		mlxsw_reg_ratr_trap_action_set(ratr_pl,
+					       MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
+	else
+		mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
 	if (nh->counter_valid)
 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
 	else
@@ -4128,9 +4138,7 @@ mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
 {
 	int err = -EINVAL;
 
-	if (nh->is_reject)
-		NL_SET_ERR_MSG_MOD(extack, "Blackhole nexthops are not supported");
-	else if (nh->is_fdb)
+	if (nh->is_fdb)
 		NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
 	else if (nh->has_encap)
 		NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
@@ -4165,7 +4173,7 @@ mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
 		/* Device only nexthops with an IPIP device are programmed as
 		 * encapsulating adjacency entries.
 		 */
-		if (!nh->gw_family &&
+		if (!nh->gw_family && !nh->is_reject &&
 		    !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
 			NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
 			return -EINVAL;
@@ -4199,10 +4207,31 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
 		return true;
 
 	dev = info->nh->dev;
-	return info->nh->gw_family ||
+	return info->nh->gw_family || info->nh->is_reject ||
 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
 }
 
+static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
+						struct mlxsw_sp_nexthop *nh)
+{
+	u16 lb_rif_index = mlxsw_sp->router->lb_rif_index;
+
+	nh->discard = 1;
+	nh->should_offload = 1;
+	/* While nexthops that discard packets do not forward packets
+	 * via an egress RIF, they still need to be programmed using a
+	 * valid RIF, so use the loopback RIF created during init.
+	 */
+	nh->rif = mlxsw_sp->router->rifs[lb_rif_index];
+}
+
+static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
+						struct mlxsw_sp_nexthop *nh)
+{
+	nh->rif = NULL;
+	nh->should_offload = 0;
+}
+
 static int
 mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
 			  struct mlxsw_sp_nexthop_group *nh_grp,
@@ -4236,6 +4265,9 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_type_init;
 
+	if (nh_obj->is_reject)
+		mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
+
 	return 0;
 
 err_type_init:
@@ -4247,6 +4279,8 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
 static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
 				      struct mlxsw_sp_nexthop *nh)
 {
+	if (nh->discard)
+		mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
 	list_del(&nh->router_list_node);
 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index f9a59d454e28..96d8bf7a9a67 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -201,6 +201,7 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
 			     u32 *p_adj_size, u32 *p_adj_hash_index);
 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh);
 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
+bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh);
 #define mlxsw_sp_nexthop_for_each(nh, router)				\
 	for (nh = mlxsw_sp_nexthop_next(router, NULL); nh;		\
 	     nh = mlxsw_sp_nexthop_next(router, nh))
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 07/10] selftests: forwarding: Add blackhole nexthops tests
From: Ido Schimmel @ 2020-11-23  7:12 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, jiri, dsahern, mlxsw, Ido Schimmel
In-Reply-To: <20201123071230.676469-1-idosch@idosch.org>

From: Ido Schimmel <idosch@nvidia.com>

Test that IPv4 and IPv6 ping fail when the route is using a blackhole
nexthop or a group with a blackhole nexthop. Test that ping passes when
the route starts using a valid nexthop.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
 .../net/forwarding/router_mpath_nh.sh         | 58 ++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index e8c2573d5232..388e4492b81b 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -1,7 +1,13 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	multipath_test
+	ping_ipv4_blackhole
+	ping_ipv6_blackhole
+"
 NUM_NETIFS=8
 source lib.sh
 
@@ -302,6 +308,56 @@ multipath_test()
 	multipath6_l4_test "Weighted MP 11:45" 11 45
 }
 
+ping_ipv4_blackhole()
+{
+	RET=0
+
+	ip nexthop add id 1001 blackhole
+	ip nexthop add id 1002 group 1001
+
+	ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1001
+	ping_do $h1 198.51.100.2
+	check_fail $? "ping did not fail when using a blackhole nexthop"
+
+	ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1002
+	ping_do $h1 198.51.100.2
+	check_fail $? "ping did not fail when using a blackhole nexthop group"
+
+	ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 103
+	ping_do $h1 198.51.100.2
+	check_err $? "ping failed with a valid nexthop"
+
+	log_test "IPv4 blackhole ping"
+
+	ip nexthop del id 1002
+	ip nexthop del id 1001
+}
+
+ping_ipv6_blackhole()
+{
+	RET=0
+
+	ip -6 nexthop add id 1001 blackhole
+	ip nexthop add id 1002 group 1001
+
+	ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1001
+	ping6_do $h1 2001:db8:2::2
+	check_fail $? "ping did not fail when using a blackhole nexthop"
+
+	ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1002
+	ping6_do $h1 2001:db8:2::2
+	check_fail $? "ping did not fail when using a blackhole nexthop group"
+
+	ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 106
+	ping6_do $h1 2001:db8:2::2
+	check_err $? "ping failed with a valid nexthop"
+
+	log_test "IPv6 blackhole ping"
+
+	ip nexthop del id 1002
+	ip -6 nexthop del id 1001
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 08/10] devlink: Add blackhole_nexthop trap
From: Ido Schimmel @ 2020-11-23  7:12 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, jiri, dsahern, mlxsw, Ido Schimmel
In-Reply-To: <20201123071230.676469-1-idosch@idosch.org>

From: Ido Schimmel <idosch@nvidia.com>

Add a packet trap to report packets that were dropped due to a
blackhole nexthop.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
 Documentation/networking/devlink/devlink-trap.rst | 4 ++++
 include/net/devlink.h                             | 4 +++-
 net/core/devlink.c                                | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst
index ef719ceac299..d875f3e1e9cf 100644
--- a/Documentation/networking/devlink/devlink-trap.rst
+++ b/Documentation/networking/devlink/devlink-trap.rst
@@ -476,6 +476,10 @@ be added to the following table:
    * - ``esp_parsing``
      - ``drop``
      - Traps packets dropped due to an error in the ESP header parsing
+   * - ``blackhole_nexthop``
+     - ``drop``
+     - Traps packets that the device decided to drop in case they hit a
+       blackhole nexthop
 
 Driver-specific Packet Traps
 ============================
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 457c537d0ef2..f466819cc477 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -835,6 +835,7 @@ enum devlink_trap_generic_id {
 	DEVLINK_TRAP_GENERIC_ID_DCCP_PARSING,
 	DEVLINK_TRAP_GENERIC_ID_GTP_PARSING,
 	DEVLINK_TRAP_GENERIC_ID_ESP_PARSING,
+	DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_NEXTHOP,
 
 	/* Add new generic trap IDs above */
 	__DEVLINK_TRAP_GENERIC_ID_MAX,
@@ -1058,7 +1059,8 @@ enum devlink_trap_group_generic_id {
 	"gtp_parsing"
 #define DEVLINK_TRAP_GENERIC_NAME_ESP_PARSING \
 	"esp_parsing"
-
+#define DEVLINK_TRAP_GENERIC_NAME_BLACKHOLE_NEXTHOP \
+	"blackhole_nexthop"
 
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \
 	"l2_drops"
diff --git a/net/core/devlink.c b/net/core/devlink.c
index e6fb1fdedded..7c05e8603bff 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -9490,6 +9490,7 @@ static const struct devlink_trap devlink_trap_generic[] = {
 	DEVLINK_TRAP(DCCP_PARSING, DROP),
 	DEVLINK_TRAP(GTP_PARSING, DROP),
 	DEVLINK_TRAP(ESP_PARSING, DROP),
+	DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP),
 };
 
 #define DEVLINK_TRAP_GROUP(_id)						      \
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 06/10] selftests: mlxsw: Add blackhole nexthop configuration tests
From: Ido Schimmel @ 2020-11-23  7:12 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, jiri, dsahern, mlxsw, Ido Schimmel
In-Reply-To: <20201123071230.676469-1-idosch@idosch.org>

From: Ido Schimmel <idosch@nvidia.com>

Test the mlxsw allows blackhole nexthops to be installed and that the
nexthops are marked as offloaded.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
 .../selftests/drivers/net/mlxsw/rtnetlink.sh  | 25 ++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index 5de47d72f8c9..a2eff5f58209 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -32,6 +32,7 @@ ALL_TESTS="
 	nexthop_obj_invalid_test
 	nexthop_obj_offload_test
 	nexthop_obj_group_offload_test
+	nexthop_obj_blackhole_offload_test
 	nexthop_obj_route_offload_test
 	devlink_reload_test
 "
@@ -693,9 +694,6 @@ nexthop_obj_invalid_test()
 	ip nexthop add id 1 encap mpls 200/300 via 192.0.2.3 dev $swp1
 	check_fail $? "managed to configure a nexthop with MPLS encap when should not"
 
-	ip nexthop add id 1 blackhole
-	check_fail $? "managed to configure a blackhole nexthop when should not"
-
 	ip nexthop add id 1 dev $swp1
 	ip nexthop add id 2 dev $swp1
 	ip nexthop add id 10 group 1/2
@@ -817,6 +815,27 @@ nexthop_obj_group_offload_test()
 	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
 }
 
+nexthop_obj_blackhole_offload_test()
+{
+	# Test offload indication of blackhole nexthop objects
+	RET=0
+
+	ip nexthop add id 1 blackhole
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "Blackhole nexthop not marked as offloaded when should"
+
+	ip nexthop add id 10 group 1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 10
+	check_err $? "Nexthop group not marked as offloaded when should"
+
+	log_test "blackhole nexthop objects offload indication"
+
+	ip nexthop del id 10
+	ip nexthop del id 1
+}
+
 nexthop_obj_route_offload_test()
 {
 	# Test offload indication of routes using nexthop objects
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 09/10] mlxsw: spectrum_trap: Add blackhole_nexthop trap
From: Ido Schimmel @ 2020-11-23  7:12 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, jiri, dsahern, mlxsw, Ido Schimmel
In-Reply-To: <20201123071230.676469-1-idosch@idosch.org>

From: Ido Schimmel <idosch@nvidia.com>

Register with devlink the blackhole_nexthop trap so that mlxsw will be
able to report packets dropped due to a blackhole nexthop.

The internal trap identifier is "DISCARD_ROUTER3", which traps packets
dropped in the adjacency table.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 8fd7d858f3c8..4ef12e3e021a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -1007,6 +1007,12 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
 					     false),
 		},
 	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(BLACKHOLE_NEXTHOP, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ROUTER3, L3_DISCARDS),
+		},
+	},
 };
 
 static struct mlxsw_sp_trap_policer_item *
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 10/10] selftests: mlxsw: Add blackhole_nexthop trap test
From: Ido Schimmel @ 2020-11-23  7:12 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuba, jiri, dsahern, mlxsw, Ido Schimmel
In-Reply-To: <20201123071230.676469-1-idosch@idosch.org>

From: Ido Schimmel <idosch@nvidia.com>

Test that packets hitting a blackhole nexthop are trapped to the CPU
when the trap is enabled. Test that packets are not reported when the
trap is disabled.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
 .../net/mlxsw/devlink_trap_l3_drops.sh        | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index f5abb1ebd392..4029833f7e27 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -52,6 +52,7 @@ ALL_TESTS="
 	blackhole_route_test
 	irif_disabled_test
 	erif_disabled_test
+	blackhole_nexthop_test
 "
 
 NUM_NETIFS=4
@@ -647,6 +648,41 @@ erif_disabled_test()
 	devlink_trap_action_set $trap_name "drop"
 }
 
+__blackhole_nexthop_test()
+{
+	local flags=$1; shift
+	local subnet=$1; shift
+	local proto=$1; shift
+	local dip=$1; shift
+	local trap_name="blackhole_nexthop"
+	local mz_pid
+
+	RET=0
+
+	ip -$flags nexthop add id 1 blackhole
+	ip -$flags route add $subnet nhid 1
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower skip_hw dst_ip $dip ip_proto udp action drop
+
+	# Generate packets to the blackhole nexthop
+	$MZ $h1 -$flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \
+		-B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+	log_test "Blackhole nexthop: IPv$flags"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+	ip -$flags route del $subnet
+	ip -$flags nexthop del id 1
+}
+
+blackhole_nexthop_test()
+{
+	__blackhole_nexthop_test "4" "198.51.100.0/30" "ip" $h2_ipv4
+	__blackhole_nexthop_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 1/2] dt-bindings: net: nfc: s3fwrn5: Support a UART interface
From: Bongsu Jeon @ 2020-11-23  7:55 UTC (permalink / raw)
  To: krzk@kernel.org, kuba@kernel.org
  Cc: linux-nfc@lists.01.org, netdev@vger.kernel.org,
	devicetree@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <CGME20201123075526epcms2p59410a8ba942f8942f53a593d9df764d0@epcms2p5>

Since S3FWRN82 NFC Chip, The UART interface can be used.
S3FWRN82 supports I2C and UART interface.

Signed-off-by: Bongsu Jeon <bongsu.jeon@samsung.com>
---
 .../bindings/net/nfc/samsung,s3fwrn5.yaml     | 28 +++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml b/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
index cb0b8a560282..37b3e5ae5681 100644
--- a/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
+++ b/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
@@ -13,6 +13,7 @@ maintainers:
 properties:
   compatible:
     const: samsung,s3fwrn5-i2c
+    const: samsung,s3fwrn82-uart
 
   en-gpios:
     maxItems: 1
@@ -47,10 +48,19 @@ additionalProperties: false
 required:
   - compatible
   - en-gpios
-  - interrupts
-  - reg
   - wake-gpios
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: samsung,s3fwrn5-i2c
+    then:
+      required:
+        - interrupts
+        - reg
+
 examples:
   - |
     #include <dt-bindings/gpio/gpio.h>
@@ -71,3 +81,17 @@ examples:
             wake-gpios = <&gpj0 2 GPIO_ACTIVE_HIGH>;
         };
     };
+  # UART example on Raspberry Pi
+  - |
+    &uart0 {
+        status = "okay";
+
+        s3fwrn82_uart {
+            compatible = "samsung,s3fwrn82-uart";
+
+            en-gpios = <&gpio 20 0>;
+            wake-gpios = <&gpio 16 0>;
+
+            status = "okay";
+        };
+    };
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next 2/2] net: nfc: s3fwrn5: Support a UART interface
From: Bongsu Jeon @ 2020-11-23  7:56 UTC (permalink / raw)
  To: krzk@kernel.org, kuba@kernel.org
  Cc: linux-kernel@vger.kernel.org, linux-nfc@lists.01.org,
	netdev@vger.kernel.org
In-Reply-To: <CGME20201123075658epcms2p5a6237314f7a72a2556545d3f96261c93@epcms2p5>

Since S3FWRN82 NFC Chip, The UART interface can be used.
S3FWRN82 uses NCI protocol and supports I2C and UART interface.

Signed-off-by: Bongsu Jeon <bongsu.jeon@samsung.com>
---
 drivers/nfc/s3fwrn5/Kconfig  |  12 ++
 drivers/nfc/s3fwrn5/Makefile |   2 +
 drivers/nfc/s3fwrn5/uart.c   | 250 +++++++++++++++++++++++++++++++++++
 3 files changed, 264 insertions(+)
 create mode 100644 drivers/nfc/s3fwrn5/uart.c

diff --git a/drivers/nfc/s3fwrn5/Kconfig b/drivers/nfc/s3fwrn5/Kconfig
index 3f8b6da58280..6f88737769e1 100644
--- a/drivers/nfc/s3fwrn5/Kconfig
+++ b/drivers/nfc/s3fwrn5/Kconfig
@@ -20,3 +20,15 @@ config NFC_S3FWRN5_I2C
 	  To compile this driver as a module, choose m here. The module will
 	  be called s3fwrn5_i2c.ko.
 	  Say N if unsure.
+
+config NFC_S3FWRN82_UART
+	tristate "Samsung S3FWRN82 UART support"
+	depends on NFC_NCI && SERIAL_DEV_BUS
+	select NFC_S3FWRN5
+	help
+	  This module adds support for a UART interface to the S3FWRN82 chip.
+	  Select this if your platform is using the UART bus.
+
+	  To compile this driver as a module, choose m here. The module will
+	  be called s3fwrn82_uart.ko.
+	  Say N if unsure.
diff --git a/drivers/nfc/s3fwrn5/Makefile b/drivers/nfc/s3fwrn5/Makefile
index d0ffa35f50e8..d1902102060b 100644
--- a/drivers/nfc/s3fwrn5/Makefile
+++ b/drivers/nfc/s3fwrn5/Makefile
@@ -5,6 +5,8 @@
 
 s3fwrn5-objs = core.o firmware.o nci.o
 s3fwrn5_i2c-objs = i2c.o
+s3fwrn82_uart-objs = uart.o
 
 obj-$(CONFIG_NFC_S3FWRN5) += s3fwrn5.o
 obj-$(CONFIG_NFC_S3FWRN5_I2C) += s3fwrn5_i2c.o
+obj-$(CONFIG_NFC_S3FWRN82_UART) += s3fwrn82_uart.o
diff --git a/drivers/nfc/s3fwrn5/uart.c b/drivers/nfc/s3fwrn5/uart.c
new file mode 100644
index 000000000000..b3c36a5b28d3
--- /dev/null
+++ b/drivers/nfc/s3fwrn5/uart.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * UART Link Layer for S3FWRN82 NCI based Driver
+ *
+ * Copyright (C) 2020 Samsung Electronics
+ * Author: Bongsu Jeon <bongsu.jeon@samsung.com>
+ * All rights reserved.
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/nfc.h>
+#include <linux/netdevice.h>
+#include <linux/of.h>
+#include <linux/serdev.h>
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
+
+#include "s3fwrn5.h"
+
+#define S3FWRN82_UART_DRIVER_NAME "s3fwrn82_uart"
+#define S3FWRN82_NCI_HEADER 3
+#define S3FWRN82_NCI_IDX 2
+#define S3FWRN82_EN_WAIT_TIME 20
+#define NCI_SKB_BUFF_LEN 258
+
+struct s3fwrn82_uart_phy {
+	struct serdev_device *ser_dev;
+	struct nci_dev *ndev;
+	struct sk_buff *recv_skb;
+
+	unsigned int gpio_en;
+	unsigned int gpio_fw_wake;
+
+	/* mutex is used to synchronize */
+	struct mutex mutex;
+	enum s3fwrn5_mode mode;
+};
+
+static void s3fwrn82_uart_set_wake(void *phy_id, bool wake)
+{
+	struct s3fwrn82_uart_phy *phy = phy_id;
+
+	mutex_lock(&phy->mutex);
+	gpio_set_value(phy->gpio_fw_wake, wake);
+	msleep(S3FWRN82_EN_WAIT_TIME);
+	mutex_unlock(&phy->mutex);
+}
+
+static void s3fwrn82_uart_set_mode(void *phy_id, enum s3fwrn5_mode mode)
+{
+	struct s3fwrn82_uart_phy *phy = phy_id;
+
+	mutex_lock(&phy->mutex);
+	if (phy->mode == mode)
+		goto out;
+	phy->mode = mode;
+	gpio_set_value(phy->gpio_en, 1);
+	gpio_set_value(phy->gpio_fw_wake, 0);
+	if (mode == S3FWRN5_MODE_FW)
+		gpio_set_value(phy->gpio_fw_wake, 1);
+	if (mode != S3FWRN5_MODE_COLD) {
+		msleep(S3FWRN82_EN_WAIT_TIME);
+		gpio_set_value(phy->gpio_en, 0);
+		msleep(S3FWRN82_EN_WAIT_TIME);
+	}
+out:
+	mutex_unlock(&phy->mutex);
+}
+
+static enum s3fwrn5_mode s3fwrn82_uart_get_mode(void *phy_id)
+{
+	struct s3fwrn82_uart_phy *phy = phy_id;
+	enum s3fwrn5_mode mode;
+
+	mutex_lock(&phy->mutex);
+	mode = phy->mode;
+	mutex_unlock(&phy->mutex);
+	return mode;
+}
+
+static int s3fwrn82_uart_write(void *phy_id, struct sk_buff *out)
+{
+	struct s3fwrn82_uart_phy *phy = phy_id;
+	int err;
+
+	err = serdev_device_write(phy->ser_dev,
+				  out->data, out->len,
+				  MAX_SCHEDULE_TIMEOUT);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static const struct s3fwrn5_phy_ops uart_phy_ops = {
+	.set_wake = s3fwrn82_uart_set_wake,
+	.set_mode = s3fwrn82_uart_set_mode,
+	.get_mode = s3fwrn82_uart_get_mode,
+	.write = s3fwrn82_uart_write,
+};
+
+static int s3fwrn82_uart_read(struct serdev_device *serdev,
+			      const unsigned char *data,
+			      size_t count)
+{
+	struct s3fwrn82_uart_phy *phy = serdev_device_get_drvdata(serdev);
+	size_t i;
+
+	for (i = 0; i < count; i++) {
+		skb_put_u8(phy->recv_skb, *data++);
+
+		if (phy->recv_skb->len < S3FWRN82_NCI_HEADER)
+			continue;
+
+		if ((phy->recv_skb->len - S3FWRN82_NCI_HEADER)
+				< phy->recv_skb->data[S3FWRN82_NCI_IDX])
+			continue;
+
+		s3fwrn5_recv_frame(phy->ndev, phy->recv_skb, phy->mode);
+		phy->recv_skb = alloc_skb(NCI_SKB_BUFF_LEN, GFP_KERNEL);
+		if (!phy->recv_skb)
+			return 0;
+	}
+
+	return i;
+}
+
+static struct serdev_device_ops s3fwrn82_serdev_ops = {
+	.receive_buf = s3fwrn82_uart_read,
+	.write_wakeup = serdev_device_write_wakeup,
+};
+
+static const struct of_device_id s3fwrn82_uart_of_match[] = {
+	{ .compatible = "samsung,s3fwrn82-uart", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, s3fwrn82_uart_of_match);
+
+static int s3fwrn82_uart_parse_dt(struct serdev_device *serdev)
+{
+	struct s3fwrn82_uart_phy *phy = serdev_device_get_drvdata(serdev);
+	struct device_node *np = serdev->dev.of_node;
+
+	if (!np)
+		return -ENODEV;
+
+	phy->gpio_en = of_get_named_gpio(np, "en-gpios", 0);
+	if (!gpio_is_valid(phy->gpio_en))
+		return -ENODEV;
+
+	phy->gpio_fw_wake = of_get_named_gpio(np, "wake-gpios", 0);
+	if (!gpio_is_valid(phy->gpio_fw_wake))
+		return -ENODEV;
+
+	return 0;
+}
+
+static int s3fwrn82_uart_probe(struct serdev_device *serdev)
+{
+	struct s3fwrn82_uart_phy *phy;
+	int ret = -ENOMEM;
+
+	phy = devm_kzalloc(&serdev->dev, sizeof(*phy), GFP_KERNEL);
+	if (!phy)
+		goto err_exit;
+
+	phy->recv_skb = alloc_skb(NCI_SKB_BUFF_LEN, GFP_KERNEL);
+	if (!phy->recv_skb)
+		goto err_free;
+
+	mutex_init(&phy->mutex);
+	phy->mode = S3FWRN5_MODE_COLD;
+
+	phy->ser_dev = serdev;
+	serdev_device_set_drvdata(serdev, phy);
+	serdev_device_set_client_ops(serdev, &s3fwrn82_serdev_ops);
+	ret = serdev_device_open(serdev);
+	if (ret) {
+		dev_err(&serdev->dev, "Unable to open device\n");
+		goto err_skb;
+	}
+
+	ret = serdev_device_set_baudrate(serdev, 115200);
+	if (ret != 115200) {
+		ret = -EINVAL;
+		goto err_serdev;
+	}
+
+	serdev_device_set_flow_control(serdev, false);
+
+	ret = s3fwrn82_uart_parse_dt(serdev);
+	if (ret < 0)
+		goto err_serdev;
+
+	ret = devm_gpio_request_one(&phy->ser_dev->dev,
+				    phy->gpio_en,
+				    GPIOF_OUT_INIT_HIGH,
+				    "s3fwrn82_en");
+	if (ret < 0)
+		goto err_serdev;
+
+	ret = devm_gpio_request_one(&phy->ser_dev->dev,
+				    phy->gpio_fw_wake,
+				    GPIOF_OUT_INIT_LOW,
+				    "s3fwrn82_fw_wake");
+	if (ret < 0)
+		goto err_serdev;
+
+	ret = s3fwrn5_probe(&phy->ndev, phy, &phy->ser_dev->dev, &uart_phy_ops);
+	if (ret < 0)
+		goto err_serdev;
+
+	return ret;
+
+err_serdev:
+	serdev_device_close(serdev);
+err_skb:
+	kfree_skb(phy->recv_skb);
+err_free:
+	kfree(phy);
+err_exit:
+	return ret;
+}
+
+static void s3fwrn82_uart_remove(struct serdev_device *serdev)
+{
+	struct s3fwrn82_uart_phy *phy = serdev_device_get_drvdata(serdev);
+
+	s3fwrn5_remove(phy->ndev);
+	serdev_device_close(serdev);
+	kfree_skb(phy->recv_skb);
+	kfree(phy);
+}
+
+static struct serdev_device_driver s3fwrn82_uart_driver = {
+	.probe = s3fwrn82_uart_probe,
+	.remove = s3fwrn82_uart_remove,
+	.driver = {
+		.name = S3FWRN82_UART_DRIVER_NAME,
+		.of_match_table = of_match_ptr(s3fwrn82_uart_of_match),
+	},
+};
+
+module_serdev_device_driver(s3fwrn82_uart_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("UART driver for Samsung NFC");
+MODULE_AUTHOR("Bongsu Jeon <bongsu.jeon@samsung.com>");
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH net-next 1/2] dt-bindings: net: nfc: s3fwrn5: Support a UART interface
From: krzk @ 2020-11-23  8:01 UTC (permalink / raw)
  To: Bongsu Jeon
  Cc: kuba@kernel.org, linux-nfc@lists.01.org, netdev@vger.kernel.org,
	devicetree@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <20201123075526epcms2p59410a8ba942f8942f53a593d9df764d0@epcms2p5>

On Mon, Nov 23, 2020 at 04:55:26PM +0900, Bongsu Jeon wrote:
> Since S3FWRN82 NFC Chip, The UART interface can be used.
> S3FWRN82 supports I2C and UART interface.
> 
> Signed-off-by: Bongsu Jeon <bongsu.jeon@samsung.com>
> ---
>  .../bindings/net/nfc/samsung,s3fwrn5.yaml     | 28 +++++++++++++++++--
>  1 file changed, 26 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml b/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
> index cb0b8a560282..37b3e5ae5681 100644
> --- a/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
> +++ b/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
> @@ -13,6 +13,7 @@ maintainers:
>  properties:
>    compatible:
>      const: samsung,s3fwrn5-i2c
> +    const: samsung,s3fwrn82-uart

This does not work, you need to use enum. Did you run at least
dt_bindings_check?

The compatible should be just "samsung,s3fwrn82". I think it was a
mistake in the first s3fwrn5 submission to add a interface to
compatible.

>  
>    en-gpios:
>      maxItems: 1
> @@ -47,10 +48,19 @@ additionalProperties: false
>  required:
>    - compatible
>    - en-gpios
> -  - interrupts
> -  - reg
>    - wake-gpios
>  
> +allOf:
> +  - if:
> +      properties:
> +        compatible:
> +          contains:
> +            const: samsung,s3fwrn5-i2c
> +    then:
> +      required:
> +        - interrupts
> +        - reg
> +
>  examples:
>    - |
>      #include <dt-bindings/gpio/gpio.h>
> @@ -71,3 +81,17 @@ examples:
>              wake-gpios = <&gpj0 2 GPIO_ACTIVE_HIGH>;
>          };
>      };
> +  # UART example on Raspberry Pi
> +  - |
> +    &uart0 {
> +        status = "okay";
> +
> +        s3fwrn82_uart {

Just "bluetooth" to follow Devicetree specification.

Best regards,
Krzysztof

^ permalink raw reply

* Re: [PATCH net-next] net: page_pool: Add page_pool_put_page_bulk() to page_pool.rst
From: Ilias Apalodimas @ 2020-11-23  8:06 UTC (permalink / raw)
  To: Lorenzo Bianconi; +Cc: netdev, lorenzo.bianconi, davem, kuba, brouer
In-Reply-To: <937ccc89a82302a06744bcb6d253f0e95651caa2.1605910519.git.lorenzo@kernel.org>

On Fri, Nov 20, 2020 at 11:19:34PM +0100, Lorenzo Bianconi wrote:
> Introduce page_pool_put_page_bulk() entry into the API section of
> page_pool.rst
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
>  Documentation/networking/page_pool.rst | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst
> index 43088ddf95e4..e848f5b995b8 100644
> --- a/Documentation/networking/page_pool.rst
> +++ b/Documentation/networking/page_pool.rst
> @@ -97,6 +97,14 @@ a page will cause no race conditions is enough.
>  
>  * page_pool_get_dma_dir(): Retrieve the stored DMA direction.
>  
> +* page_pool_put_page_bulk(): It tries to refill a bulk of count pages into the

Tries to refill a number of pages sounds better?

> +  ptr_ring cache holding ptr_ring producer lock. If the ptr_ring is full,
> +  page_pool_put_page_bulk() will release leftover pages to the page allocator.
> +  page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx
> +  completion loop for the XDP_REDIRECT use case.
> +  Please consider the caller must not use data area after running

s/consider/note/

> +  page_pool_put_page_bulk(), as this function overwrites it.
> +
>  Coding examples
>  ===============
>  
> -- 
> 2.28.0
> 


Other than that 
Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>

^ permalink raw reply

* Re: [PATCH net-next v2] compat: always include linux/compat.h from net/compat.h
From: Christoph Hellwig @ 2020-11-23  8:06 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: davem, netdev, hch, arnd
In-Reply-To: <20201121214844.1488283-1-kuba@kernel.org>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply

* Re: [PATCH net] bonding: fix feature flag setting at init time
From: Ivan Vecera @ 2020-11-23  8:19 UTC (permalink / raw)
  To: Jarod Wilson
  Cc: linux-kernel, Jay Vosburgh, Veaceslav Falico, Andy Gospodarek,
	David S. Miller, Jakub Kicinski, Thomas Davis, netdev
In-Reply-To: <20201123031716.6179-1-jarod@redhat.com>

On Sun, 22 Nov 2020 22:17:16 -0500
Jarod Wilson <jarod@redhat.com> wrote:

> Have run into a case where bond_option_mode_set() gets called before
> hw_features has been filled in, and very bad things happen when
> netdev_change_features() then gets called, because the empty hw_features
> wipes out almost all features. Further reading of netdev feature flag
> documentation suggests drivers aren't supposed to touch wanted_features,
> so this changes bond_option_mode_set() to use netdev_increment_features()
> and &= ~BOND_XFRM_FEATURES on mode changes and then only calling
> netdev_features_change() if there was actually a change of features. This
> specifically fixes bonding on top of mlxsw interfaces, and has been
> regression-tested with ixgbe interfaces. This change also simplifies the
> xfrm-specific code in bond_setup() a little bit as well.

Hi Jarod,

the reason is not correct... The problem is not with empty ->hw_features but
with empty ->wanted_features.
During bond device creation bond_newlink() is called. It calls bond_changelink()
first and afterwards register_netdevice(). The problem is that ->wanted_features
are initialized in register_netdevice() so during bond_changlink() call
->wanted_features is 0. So...

bond_newlink()
-> bond_changelink()
   -> __bond_opt_set()
      -> bond_option_mode_set()
         -> netdev_change_features()
            -> __netdev_update_features()
               features = netdev_get_wanted_features()
                          { dev->features & ~dev->hw_features) | dev->wanted_features }

dev->wanted_features is here zero so the rest of the expression clears a bunch of
bits from dev->features...

In case of mlxsw it is important that NETIF_F_HW_VLAN_CTAG_FILTER bit is cleared in
bonding device because in this case vlan_add_rx_filter_info() does not call
bond_vlan_rx_add_vid() so mlxsw_sp_port_add_vid() is not called as well.

Later this causes a WARN in mlxsw_sp_inetaddr_port_vlan_event() because
instance of mlxsw_sp_port_vlan does not exist as mlxsw_sp_port_add_vid() was not
called.

Btw. it should be enough to call existing snippet in bond_option_mode_set() only
when device is already registered?

E.g.:
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 9abfaae1c6f7..ca4913fee5a9 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -768,11 +768,13 @@ static int bond_option_mode_set(struct bonding *bond,
                bond->params.tlb_dynamic_lb = 1;
 
 #ifdef CONFIG_XFRM_OFFLOAD
-       if (newval->value == BOND_MODE_ACTIVEBACKUP)
-               bond->dev->wanted_features |= BOND_XFRM_FEATURES;
-       else
-               bond->dev->wanted_features &= ~BOND_XFRM_FEATURES;
-       netdev_change_features(bond->dev);
+       if (dev->reg_state == NETREG_REGISTERED) {
+               if (newval->value == BOND_MODE_ACTIVEBACKUP)
+                       bond->dev->wanted_features |= BOND_XFRM_FEATURES;
+               else
+                       bond->dev->wanted_features &= ~BOND_XFRM_FEATURES;
+               netdev_change_features(bond->dev);
+       }
 #endif /* CONFIG_XFRM_OFFLOAD */


Thanks,
Ivan


^ permalink raw reply related

* Re: [PATCH net-next 2/2] net: nfc: s3fwrn5: Support a UART interface
From: krzk @ 2020-11-23  8:19 UTC (permalink / raw)
  To: Bongsu Jeon
  Cc: kuba@kernel.org, linux-kernel@vger.kernel.org,
	linux-nfc@lists.01.org, netdev@vger.kernel.org
In-Reply-To: <20201123075658epcms2p5a6237314f7a72a2556545d3f96261c93@epcms2p5>

On Mon, Nov 23, 2020 at 04:56:58PM +0900, Bongsu Jeon wrote:
> Since S3FWRN82 NFC Chip, The UART interface can be used.
> S3FWRN82 uses NCI protocol and supports I2C and UART interface.
> 
> Signed-off-by: Bongsu Jeon <bongsu.jeon@samsung.com>

Please start sending emails properly, e.g. with git send-email, so all
your patches in the patchset are referencing the first patch.

> ---
>  drivers/nfc/s3fwrn5/Kconfig  |  12 ++
>  drivers/nfc/s3fwrn5/Makefile |   2 +
>  drivers/nfc/s3fwrn5/uart.c   | 250 +++++++++++++++++++++++++++++++++++
>  3 files changed, 264 insertions(+)
>  create mode 100644 drivers/nfc/s3fwrn5/uart.c
> 
> diff --git a/drivers/nfc/s3fwrn5/Kconfig b/drivers/nfc/s3fwrn5/Kconfig
> index 3f8b6da58280..6f88737769e1 100644
> --- a/drivers/nfc/s3fwrn5/Kconfig
> +++ b/drivers/nfc/s3fwrn5/Kconfig
> @@ -20,3 +20,15 @@ config NFC_S3FWRN5_I2C
>  	  To compile this driver as a module, choose m here. The module will
>  	  be called s3fwrn5_i2c.ko.
>  	  Say N if unsure.
> +
> +config NFC_S3FWRN82_UART
> +	tristate "Samsung S3FWRN82 UART support"
> +	depends on NFC_NCI && SERIAL_DEV_BUS

What about SERIAL_DEV_BUS as module? Shouldn't this be
SERIAL_DEV_BUS || !SERIAL_DEV_BUS?

> +	select NFC_S3FWRN5
> +	help
> +	  This module adds support for a UART interface to the S3FWRN82 chip.
> +	  Select this if your platform is using the UART bus.
> +
> +	  To compile this driver as a module, choose m here. The module will
> +	  be called s3fwrn82_uart.ko.
> +	  Say N if unsure.
> diff --git a/drivers/nfc/s3fwrn5/Makefile b/drivers/nfc/s3fwrn5/Makefile
> index d0ffa35f50e8..d1902102060b 100644
> --- a/drivers/nfc/s3fwrn5/Makefile
> +++ b/drivers/nfc/s3fwrn5/Makefile
> @@ -5,6 +5,8 @@
>  
>  s3fwrn5-objs = core.o firmware.o nci.o
>  s3fwrn5_i2c-objs = i2c.o
> +s3fwrn82_uart-objs = uart.o
>  
>  obj-$(CONFIG_NFC_S3FWRN5) += s3fwrn5.o
>  obj-$(CONFIG_NFC_S3FWRN5_I2C) += s3fwrn5_i2c.o
> +obj-$(CONFIG_NFC_S3FWRN82_UART) += s3fwrn82_uart.o
> diff --git a/drivers/nfc/s3fwrn5/uart.c b/drivers/nfc/s3fwrn5/uart.c
> new file mode 100644
> index 000000000000..b3c36a5b28d3
> --- /dev/null
> +++ b/drivers/nfc/s3fwrn5/uart.c
> @@ -0,0 +1,250 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * UART Link Layer for S3FWRN82 NCI based Driver
> + *
> + * Copyright (C) 2020 Samsung Electronics
> + * Author: Bongsu Jeon <bongsu.jeon@samsung.com>

You copied a lot from existing i2c.c. Please keep also the original
copyrights.

> + * All rights reserved.
> + */
> +
> +#include <linux/device.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/nfc.h>
> +#include <linux/netdevice.h>
> +#include <linux/of.h>
> +#include <linux/serdev.h>
> +#include <linux/gpio.h>
> +#include <linux/of_gpio.h>
> +
> +#include "s3fwrn5.h"
> +
> +#define S3FWRN82_UART_DRIVER_NAME "s3fwrn82_uart"

Remove the define, it is used only once.

> +#define S3FWRN82_NCI_HEADER 3
> +#define S3FWRN82_NCI_IDX 2
> +#define S3FWRN82_EN_WAIT_TIME 20
> +#define NCI_SKB_BUFF_LEN 258
> +
> +struct s3fwrn82_uart_phy {
> +	struct serdev_device *ser_dev;
> +	struct nci_dev *ndev;
> +	struct sk_buff *recv_skb;
> +
> +	unsigned int gpio_en;
> +	unsigned int gpio_fw_wake;
> +
> +	/* mutex is used to synchronize */

Please do not write obvious comments. Mutex is always used to
synchronize, what else is it for? Instead you must describe what exactly
is protected with mutex.

> +	struct mutex mutex;
> +	enum s3fwrn5_mode mode;
> +};
> +
> +static void s3fwrn82_uart_set_wake(void *phy_id, bool wake)
> +{
> +	struct s3fwrn82_uart_phy *phy = phy_id;
> +
> +	mutex_lock(&phy->mutex);
> +	gpio_set_value(phy->gpio_fw_wake, wake);
> +	msleep(S3FWRN82_EN_WAIT_TIME);
> +	mutex_unlock(&phy->mutex);
> +}
> +
> +static void s3fwrn82_uart_set_mode(void *phy_id, enum s3fwrn5_mode mode)
> +{
> +	struct s3fwrn82_uart_phy *phy = phy_id;
> +
> +	mutex_lock(&phy->mutex);
> +	if (phy->mode == mode)
> +		goto out;
> +	phy->mode = mode;
> +	gpio_set_value(phy->gpio_en, 1);
> +	gpio_set_value(phy->gpio_fw_wake, 0);
> +	if (mode == S3FWRN5_MODE_FW)
> +		gpio_set_value(phy->gpio_fw_wake, 1);
> +	if (mode != S3FWRN5_MODE_COLD) {
> +		msleep(S3FWRN82_EN_WAIT_TIME);
> +		gpio_set_value(phy->gpio_en, 0);
> +		msleep(S3FWRN82_EN_WAIT_TIME);
> +	}
> +out:
> +	mutex_unlock(&phy->mutex);
> +}
> +
> +static enum s3fwrn5_mode s3fwrn82_uart_get_mode(void *phy_id)
> +{
> +	struct s3fwrn82_uart_phy *phy = phy_id;
> +	enum s3fwrn5_mode mode;
> +
> +	mutex_lock(&phy->mutex);
> +	mode = phy->mode;
> +	mutex_unlock(&phy->mutex);
> +	return mode;
> +}

All this duplicates I2C version. You need to start either reusing common
blocks.

> +
> +static int s3fwrn82_uart_write(void *phy_id, struct sk_buff *out)
> +{
> +	struct s3fwrn82_uart_phy *phy = phy_id;
> +	int err;
> +
> +	err = serdev_device_write(phy->ser_dev,
> +				  out->data, out->len,
> +				  MAX_SCHEDULE_TIMEOUT);
> +	if (err < 0)
> +		return err;
> +
> +	return 0;
> +}
> +
> +static const struct s3fwrn5_phy_ops uart_phy_ops = {
> +	.set_wake = s3fwrn82_uart_set_wake,
> +	.set_mode = s3fwrn82_uart_set_mode,
> +	.get_mode = s3fwrn82_uart_get_mode,
> +	.write = s3fwrn82_uart_write,
> +};
> +
> +static int s3fwrn82_uart_read(struct serdev_device *serdev,
> +			      const unsigned char *data,
> +			      size_t count)
> +{
> +	struct s3fwrn82_uart_phy *phy = serdev_device_get_drvdata(serdev);
> +	size_t i;
> +
> +	for (i = 0; i < count; i++) {
> +		skb_put_u8(phy->recv_skb, *data++);
> +
> +		if (phy->recv_skb->len < S3FWRN82_NCI_HEADER)
> +			continue;
> +
> +		if ((phy->recv_skb->len - S3FWRN82_NCI_HEADER)
> +				< phy->recv_skb->data[S3FWRN82_NCI_IDX])
> +			continue;
> +
> +		s3fwrn5_recv_frame(phy->ndev, phy->recv_skb, phy->mode);
> +		phy->recv_skb = alloc_skb(NCI_SKB_BUFF_LEN, GFP_KERNEL);
> +		if (!phy->recv_skb)
> +			return 0;
> +	}
> +
> +	return i;
> +}
> +
> +static struct serdev_device_ops s3fwrn82_serdev_ops = {

const

> +	.receive_buf = s3fwrn82_uart_read,
> +	.write_wakeup = serdev_device_write_wakeup,
> +};
> +
> +static const struct of_device_id s3fwrn82_uart_of_match[] = {
> +	{ .compatible = "samsung,s3fwrn82-uart", },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, s3fwrn82_uart_of_match);
> +
> +static int s3fwrn82_uart_parse_dt(struct serdev_device *serdev)
> +{
> +	struct s3fwrn82_uart_phy *phy = serdev_device_get_drvdata(serdev);
> +	struct device_node *np = serdev->dev.of_node;
> +
> +	if (!np)
> +		return -ENODEV;
> +
> +	phy->gpio_en = of_get_named_gpio(np, "en-gpios", 0);
> +	if (!gpio_is_valid(phy->gpio_en))
> +		return -ENODEV;
> +
> +	phy->gpio_fw_wake = of_get_named_gpio(np, "wake-gpios", 0);

You should not cast it it unsigned int. I'll fix the s3fwrn5 from which
you copied this apparently.

> +	if (!gpio_is_valid(phy->gpio_fw_wake))
> +		return -ENODEV;
> +
> +	return 0;
> +}
> +
> +static int s3fwrn82_uart_probe(struct serdev_device *serdev)
> +{
> +	struct s3fwrn82_uart_phy *phy;
> +	int ret = -ENOMEM;
> +
> +	phy = devm_kzalloc(&serdev->dev, sizeof(*phy), GFP_KERNEL);
> +	if (!phy)
> +		goto err_exit;
> +
> +	phy->recv_skb = alloc_skb(NCI_SKB_BUFF_LEN, GFP_KERNEL);
> +	if (!phy->recv_skb)
> +		goto err_free;
> +
> +	mutex_init(&phy->mutex);
> +	phy->mode = S3FWRN5_MODE_COLD;
> +
> +	phy->ser_dev = serdev;
> +	serdev_device_set_drvdata(serdev, phy);
> +	serdev_device_set_client_ops(serdev, &s3fwrn82_serdev_ops);
> +	ret = serdev_device_open(serdev);
> +	if (ret) {
> +		dev_err(&serdev->dev, "Unable to open device\n");
> +		goto err_skb;
> +	}
> +
> +	ret = serdev_device_set_baudrate(serdev, 115200);

Why baudrate is fixed?

> +	if (ret != 115200) {
> +		ret = -EINVAL;
> +		goto err_serdev;
> +	}
> +
> +	serdev_device_set_flow_control(serdev, false);
> +
> +	ret = s3fwrn82_uart_parse_dt(serdev);
> +	if (ret < 0)
> +		goto err_serdev;
> +
> +	ret = devm_gpio_request_one(&phy->ser_dev->dev,
> +				    phy->gpio_en,
> +				    GPIOF_OUT_INIT_HIGH,
> +				    "s3fwrn82_en");

This is weirdly wrapped.

> +	if (ret < 0)
> +		goto err_serdev;
> +
> +	ret = devm_gpio_request_one(&phy->ser_dev->dev,
> +				    phy->gpio_fw_wake,
> +				    GPIOF_OUT_INIT_LOW,
> +				    "s3fwrn82_fw_wake");
> +	if (ret < 0)
> +		goto err_serdev;
> +
> +	ret = s3fwrn5_probe(&phy->ndev, phy, &phy->ser_dev->dev, &uart_phy_ops);
> +	if (ret < 0)
> +		goto err_serdev;
> +
> +	return ret;
> +
> +err_serdev:
> +	serdev_device_close(serdev);
> +err_skb:
> +	kfree_skb(phy->recv_skb);
> +err_free:
> +	kfree(phy);

Eee.... why? Did you test this code?

> +err_exit:
> +	return ret;
> +}
> +
> +static void s3fwrn82_uart_remove(struct serdev_device *serdev)
> +{
> +	struct s3fwrn82_uart_phy *phy = serdev_device_get_drvdata(serdev);
> +
> +	s3fwrn5_remove(phy->ndev);
> +	serdev_device_close(serdev);
> +	kfree_skb(phy->recv_skb);
> +	kfree(phy);

This does not look like tested...

Best regards,
Krzysztof

^ permalink raw reply

* Re: [PATCH bpf] xsk: fix incorrect netdev reference count
From: Magnus Karlsson @ 2020-11-23  8:28 UTC (permalink / raw)
  To: Marek Majtyka
  Cc: Karlsson, Magnus, Björn Töpel, Alexei Starovoitov,
	Daniel Borkmann, Network Development, Jonathan Lemon,
	Marek Majtyka, bpf
In-Reply-To: <20201120151443.105903-1-marekx.majtyka@intel.com>

On Fri, Nov 20, 2020 at 4:17 PM <alardam@gmail.com> wrote:
>
> From: Marek Majtyka <marekx.majtyka@intel.com>
>
> Fix incorrect netdev reference count in xsk_bind operation. Incorrect
> reference count of the device appears when a user calls bind with the
> XDP_ZEROCOPY flag on an interface which does not support zero-copy.
> In such a case, an error is returned but the reference count is not
> decreased. This change fixes the fault, by decreasing the reference count
> in case of such an error.
>
> The problem being corrected appeared in '162c820ed896' for the first time,
> and the code was moved to new file location over the time with commit
> 'c2d3d6a47462'. This specific patch applies to all version starting
> from 'c2d3d6a47462'. The same solution should be applied but on different
> file (net/xdp/xdp_umem.c) and function (xdp_umem_assign_dev) for versions
> from '162c820ed896' to 'c2d3d6a47462' excluded.
>
> Fixes: 162c820ed896 ("xdp: hold device for umem regardless of zero- ...")
> Signed-off-by: Marek Majtyka <marekx.majtyka@intel.com>
> ---
>  net/xdp/xsk_buff_pool.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
> index 8a3bf4e1318e..46d09bfb1923 100644
> --- a/net/xdp/xsk_buff_pool.c
> +++ b/net/xdp/xsk_buff_pool.c
> @@ -185,8 +185,10 @@ static int __xp_assign_dev(struct xsk_buff_pool *pool,
>  err_unreg_pool:
>         if (!force_zc)
>                 err = 0; /* fallback to copy mode */
> -       if (err)
> +       if (err) {
>                 xsk_clear_pool_at_qid(netdev, queue_id);
> +               dev_put(netdev);
> +       }
>         return err;
>  }

Thank you Marek for spotting and fixing this!

Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>

> --
> 2.27.0
>

^ permalink raw reply

* Re: [PATCH net-next v4 2/5] net/lapb: support netdev events
From: Xie He @ 2020-11-23  8:31 UTC (permalink / raw)
  To: Martin Schiller
  Cc: Andrew Hendry, David S. Miller, Jakub Kicinski, Linux X25,
	Linux Kernel Network Developers, LKML
In-Reply-To: <87a620b6a55ea8386bffefca0a1f8b77@dev.tdt.de>

On Sun, Nov 22, 2020 at 10:55 PM Martin Schiller <ms@dev.tdt.de> wrote:
>
> No, they aren't independent. The carrier can only be up if the device /
> interface is UP. And as far as I can see a NETDEV_CHANGE event will also
> only be generated on interfaces that are UP.
>
> So you can be sure, that if there is a NETDEV_CHANGE event then the
> device is UP.

OK. Thanks for your explanation!

> I removed the NETDEV_UP handling because I don't think it makes sense
> to implicitly try to establish layer2 (LAPB) if there is no carrier.

As I understand, when the device goes up, the carrier can be either
down or up. Right?

If this is true, when a device goes up and the carrier then goes up
after that, L2 will automatically connect, but if a device goes up and
the carrier is already up, L2 will not automatically connect. I think
it might be better to eliminate this difference in handling. It might
be better to make it automatically connect in both situations, or in
neither situations.

If you want to go with the second way (auto connect in neither
situations), the next (3rd) patch of this series might be also not
needed.

I just want to make the behavior of LAPB more consistent. I think we
should either make LAPB auto-connect in all situations, or make LAPB
wait for L3's instruction to connect in all situations.

> And with the first X.25 connection request on that interface, it will
> be established anyway by x25_transmit_link().
>
> I've tested it here with an HDLC WAN Adapter and it works as expected.
>
> These are also the ideal conditions for the already mentioned "on
> demand" scenario. The only necessary change would be to call
> x25_terminate_link() on an interface after clearing the last X.25
> session.
>
> > On NETDEV_GOING_DOWN, we can also check the carrier status first and
> > if it is down, we don't need to call lapb_disconnect_request.
>
> This is not necessary because lapb_disconnect_request() checks the
> current state. And if the carrier is DOWN then the state should also be
> LAPB_STATE_0 and so lapb_disconnect_request() does nothing.

Yes, I understand. I just thought adding this check might make the
code cleaner. But you are right.

^ permalink raw reply

* Re: [PATCH net-next] net: page_pool: Add page_pool_put_page_bulk() to page_pool.rst
From: Lorenzo Bianconi @ 2020-11-23  8:54 UTC (permalink / raw)
  To: Ilias Apalodimas; +Cc: Lorenzo Bianconi, netdev, davem, kuba, brouer
In-Reply-To: <X7tteeAxhH9G0TwF@apalos.home>

[-- Attachment #1: Type: text/plain, Size: 1599 bytes --]

> On Fri, Nov 20, 2020 at 11:19:34PM +0100, Lorenzo Bianconi wrote:
> > Introduce page_pool_put_page_bulk() entry into the API section of
> > page_pool.rst
> > 
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > ---
> >  Documentation/networking/page_pool.rst | 8 ++++++++
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst
> > index 43088ddf95e4..e848f5b995b8 100644
> > --- a/Documentation/networking/page_pool.rst
> > +++ b/Documentation/networking/page_pool.rst
> > @@ -97,6 +97,14 @@ a page will cause no race conditions is enough.
> >  
> >  * page_pool_get_dma_dir(): Retrieve the stored DMA direction.
> >  
> > +* page_pool_put_page_bulk(): It tries to refill a bulk of count pages into the
> 
> Tries to refill a number of pages sounds better?

ack, will fix it in v2

> 
> > +  ptr_ring cache holding ptr_ring producer lock. If the ptr_ring is full,
> > +  page_pool_put_page_bulk() will release leftover pages to the page allocator.
> > +  page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx
> > +  completion loop for the XDP_REDIRECT use case.
> > +  Please consider the caller must not use data area after running
> 
> s/consider/note/

ack, will fix it in v2

Regards,
Lorenzo

> 
> > +  page_pool_put_page_bulk(), as this function overwrites it.
> > +
> >  Coding examples
> >  ===============
> >  
> > -- 
> > 2.28.0
> > 
> 
> 
> Other than that 
> Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply

* Re: [PATCH net-next v4 2/5] net/lapb: support netdev events
From: Martin Schiller @ 2020-11-23  9:00 UTC (permalink / raw)
  To: Xie He
  Cc: Andrew Hendry, David S. Miller, Jakub Kicinski, Linux X25,
	Linux Kernel Network Developers, LKML
In-Reply-To: <CAJht_EPc8MF1TjznSjWTPyMbsrw3JVqxST5g=eF0yf_zasUdeA@mail.gmail.com>

On 2020-11-23 09:31, Xie He wrote:
> On Sun, Nov 22, 2020 at 10:55 PM Martin Schiller <ms@dev.tdt.de> wrote:
>> 
>> No, they aren't independent. The carrier can only be up if the device 
>> /
>> interface is UP. And as far as I can see a NETDEV_CHANGE event will 
>> also
>> only be generated on interfaces that are UP.
>> 
>> So you can be sure, that if there is a NETDEV_CHANGE event then the
>> device is UP.
> 
> OK. Thanks for your explanation!
> 
>> I removed the NETDEV_UP handling because I don't think it makes sense
>> to implicitly try to establish layer2 (LAPB) if there is no carrier.
> 
> As I understand, when the device goes up, the carrier can be either
> down or up. Right?
> 
> If this is true, when a device goes up and the carrier then goes up
> after that, L2 will automatically connect, but if a device goes up and
> the carrier is already up, L2 will not automatically connect. I think
> it might be better to eliminate this difference in handling. It might
> be better to make it automatically connect in both situations, or in
> neither situations.

AFAIK the carrier can't be up before the device is up. Therefore, there
will be a NETDEV_CHANGE event after the NETDEV_UP event.

This is what I can see in my tests (with the HDLC interface).

Is the behaviour different for e.g. lapbether?

> 
> If you want to go with the second way (auto connect in neither
> situations), the next (3rd) patch of this series might be also not
> needed.
> 
> I just want to make the behavior of LAPB more consistent. I think we
> should either make LAPB auto-connect in all situations, or make LAPB
> wait for L3's instruction to connect in all situations.
> 
>> And with the first X.25 connection request on that interface, it will
>> be established anyway by x25_transmit_link().
>> 
>> I've tested it here with an HDLC WAN Adapter and it works as expected.
>> 
>> These are also the ideal conditions for the already mentioned "on
>> demand" scenario. The only necessary change would be to call
>> x25_terminate_link() on an interface after clearing the last X.25
>> session.
>> 
>> > On NETDEV_GOING_DOWN, we can also check the carrier status first and
>> > if it is down, we don't need to call lapb_disconnect_request.
>> 
>> This is not necessary because lapb_disconnect_request() checks the
>> current state. And if the carrier is DOWN then the state should also 
>> be
>> LAPB_STATE_0 and so lapb_disconnect_request() does nothing.
> 
> Yes, I understand. I just thought adding this check might make the
> code cleaner. But you are right.

^ permalink raw reply

* [PATCH v4] dt-bindings: misc: convert fsl,qoriq-mc from txt to YAML
From: Laurentiu Tudor @ 2020-11-23  9:00 UTC (permalink / raw)
  To: robh+dt, leoyang.li, corbet, linux-arm-kernel, devicetree,
	linux-kernel, netdev, linux-doc
  Cc: davem, kuba, linuxppc-dev, ioana.ciornei, Ionut-robert Aron,
	Laurentiu Tudor

From: Ionut-robert Aron <ionut-robert.aron@nxp.com>

Convert fsl,qoriq-mc to YAML in order to automate the verification
process of dts files. In addition, update MAINTAINERS accordingly
and, while at it, add some missing files.

Signed-off-by: Ionut-robert Aron <ionut-robert.aron@nxp.com>
[laurentiu.tudor@nxp.com: update MINTAINERS, updates & fixes in schema]
Signed-off-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
---
Changes in v4:
 - use $ref to point to fsl,qoriq-mc-dpmac binding

Changes in v3:
 - dropped duplicated "fsl,qoriq-mc-dpmac" schema and replaced with
   reference to it
 - fixed a dt_binding_check warning

Changes in v2:
 - fixed errors reported by yamllint
 - dropped multiple unnecessary quotes
 - used schema instead of text in description
 - added constraints on dpmac reg property

 .../devicetree/bindings/misc/fsl,qoriq-mc.txt | 196 ------------------
 .../bindings/misc/fsl,qoriq-mc.yaml           | 186 +++++++++++++++++
 .../ethernet/freescale/dpaa2/overview.rst     |   5 +-
 MAINTAINERS                                   |   4 +-
 4 files changed, 193 insertions(+), 198 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
 create mode 100644 Documentation/devicetree/bindings/misc/fsl,qoriq-mc.yaml

diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
deleted file mode 100644
index 7b486d4985dc..000000000000
--- a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
+++ /dev/null
@@ -1,196 +0,0 @@
-* Freescale Management Complex
-
-The Freescale Management Complex (fsl-mc) is a hardware resource
-manager that manages specialized hardware objects used in
-network-oriented packet processing applications. After the fsl-mc
-block is enabled, pools of hardware resources are available, such as
-queues, buffer pools, I/O interfaces. These resources are building
-blocks that can be used to create functional hardware objects/devices
-such as network interfaces, crypto accelerator instances, L2 switches,
-etc.
-
-For an overview of the DPAA2 architecture and fsl-mc bus see:
-Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst
-
-As described in the above overview, all DPAA2 objects in a DPRC share the
-same hardware "isolation context" and a 10-bit value called an ICID
-(isolation context id) is expressed by the hardware to identify
-the requester.
-
-The generic 'iommus' property is insufficient to describe the relationship
-between ICIDs and IOMMUs, so an iommu-map property is used to define
-the set of possible ICIDs under a root DPRC and how they map to
-an IOMMU.
-
-For generic IOMMU bindings, see
-Documentation/devicetree/bindings/iommu/iommu.txt.
-
-For arm-smmu binding, see:
-Documentation/devicetree/bindings/iommu/arm,smmu.yaml.
-
-The MSI writes are accompanied by sideband data which is derived from the ICID.
-The msi-map property is used to associate the devices with both the ITS
-controller and the sideband data which accompanies the writes.
-
-For generic MSI bindings, see
-Documentation/devicetree/bindings/interrupt-controller/msi.txt.
-
-For GICv3 and GIC ITS bindings, see:
-Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml.
-
-Required properties:
-
-    - compatible
-        Value type: <string>
-        Definition: Must be "fsl,qoriq-mc".  A Freescale Management Complex
-                    compatible with this binding must have Block Revision
-                    Registers BRR1 and BRR2 at offset 0x0BF8 and 0x0BFC in
-                    the MC control register region.
-
-    - reg
-        Value type: <prop-encoded-array>
-        Definition: A standard property.  Specifies one or two regions
-                    defining the MC's registers:
-
-                       -the first region is the command portal for the
-                        this machine and must always be present
-
-                       -the second region is the MC control registers. This
-                        region may not be present in some scenarios, such
-                        as in the device tree presented to a virtual machine.
-
-    - ranges
-        Value type: <prop-encoded-array>
-        Definition: A standard property.  Defines the mapping between the child
-                    MC address space and the parent system address space.
-
-                    The MC address space is defined by 3 components:
-                       <region type> <offset hi> <offset lo>
-
-                    Valid values for region type are
-                       0x0 - MC portals
-                       0x1 - QBMAN portals
-
-    - #address-cells
-        Value type: <u32>
-        Definition: Must be 3.  (see definition in 'ranges' property)
-
-    - #size-cells
-        Value type: <u32>
-        Definition: Must be 1.
-
-Sub-nodes:
-
-        The fsl-mc node may optionally have dpmac sub-nodes that describe
-        the relationship between the Ethernet MACs which belong to the MC
-        and the Ethernet PHYs on the system board.
-
-        The dpmac nodes must be under a node named "dpmacs" which contains
-        the following properties:
-
-            - #address-cells
-              Value type: <u32>
-              Definition: Must be present if dpmac sub-nodes are defined and must
-                          have a value of 1.
-
-            - #size-cells
-              Value type: <u32>
-              Definition: Must be present if dpmac sub-nodes are defined and must
-                          have a value of 0.
-
-        These nodes must have the following properties:
-
-            - compatible
-              Value type: <string>
-              Definition: Must be "fsl,qoriq-mc-dpmac".
-
-            - reg
-              Value type: <prop-encoded-array>
-              Definition: Specifies the id of the dpmac.
-
-            - phy-handle
-              Value type: <phandle>
-              Definition: Specifies the phandle to the PHY device node associated
-                          with the this dpmac.
-Optional properties:
-
-- iommu-map: Maps an ICID to an IOMMU and associated iommu-specifier
-  data.
-
-  The property is an arbitrary number of tuples of
-  (icid-base,iommu,iommu-base,length).
-
-  Any ICID i in the interval [icid-base, icid-base + length) is
-  associated with the listed IOMMU, with the iommu-specifier
-  (i - icid-base + iommu-base).
-
-- msi-map: Maps an ICID to a GIC ITS and associated msi-specifier
-  data.
-
-  The property is an arbitrary number of tuples of
-  (icid-base,gic-its,msi-base,length).
-
-  Any ICID in the interval [icid-base, icid-base + length) is
-  associated with the listed GIC ITS, with the msi-specifier
-  (i - icid-base + msi-base).
-
-Deprecated properties:
-
-    - msi-parent
-        Value type: <phandle>
-        Definition: Describes the MSI controller node handling message
-                    interrupts for the MC. When there is no translation
-                    between the ICID and deviceID this property can be used
-                    to describe the MSI controller used by the devices on the
-                    mc-bus.
-                    The use of this property for mc-bus is deprecated. Please
-                    use msi-map.
-
-Example:
-
-        smmu: iommu@5000000 {
-               compatible = "arm,mmu-500";
-               #iommu-cells = <1>;
-               stream-match-mask = <0x7C00>;
-               ...
-        };
-
-        gic: interrupt-controller@6000000 {
-               compatible = "arm,gic-v3";
-               ...
-        }
-        its: gic-its@6020000 {
-               compatible = "arm,gic-v3-its";
-               msi-controller;
-               ...
-        };
-
-        fsl_mc: fsl-mc@80c000000 {
-                compatible = "fsl,qoriq-mc";
-                reg = <0x00000008 0x0c000000 0 0x40>,    /* MC portal base */
-                      <0x00000000 0x08340000 0 0x40000>; /* MC control reg */
-                /* define map for ICIDs 23-64 */
-                iommu-map = <23 &smmu 23 41>;
-                /* define msi map for ICIDs 23-64 */
-                msi-map = <23 &its 23 41>;
-                #address-cells = <3>;
-                #size-cells = <1>;
-
-                /*
-                 * Region type 0x0 - MC portals
-                 * Region type 0x1 - QBMAN portals
-                 */
-                ranges = <0x0 0x0 0x0 0x8 0x0c000000 0x4000000
-                          0x1 0x0 0x0 0x8 0x18000000 0x8000000>;
-
-                dpmacs {
-                    #address-cells = <1>;
-                    #size-cells = <0>;
-
-                    dpmac@1 {
-                        compatible = "fsl,qoriq-mc-dpmac";
-                        reg = <1>;
-                        phy-handle = <&mdio0_phy0>;
-                    }
-                }
-        };
diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.yaml b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.yaml
new file mode 100644
index 000000000000..f45e21872e4f
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.yaml
@@ -0,0 +1,186 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2020 NXP
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/misc/fsl,qoriq-mc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+maintainers:
+  - Laurentiu Tudor <laurentiu.tudor@nxp.com>
+
+title: Freescale Management Complex
+
+description: |
+  The Freescale Management Complex (fsl-mc) is a hardware resource
+  manager that manages specialized hardware objects used in
+  network-oriented packet processing applications. After the fsl-mc
+  block is enabled, pools of hardware resources are available, such as
+  queues, buffer pools, I/O interfaces. These resources are building
+  blocks that can be used to create functional hardware objects/devices
+  such as network interfaces, crypto accelerator instances, L2 switches,
+  etc.
+
+  For an overview of the DPAA2 architecture and fsl-mc bus see:
+  Documentation/networking/device_drivers/freescale/dpaa2/overview.rst
+
+  As described in the above overview, all DPAA2 objects in a DPRC share the
+  same hardware "isolation context" and a 10-bit value called an ICID
+  (isolation context id) is expressed by the hardware to identify
+  the requester.
+
+  The generic 'iommus' property is insufficient to describe the relationship
+  between ICIDs and IOMMUs, so an iommu-map property is used to define
+  the set of possible ICIDs under a root DPRC and how they map to
+  an IOMMU.
+
+  For generic IOMMU bindings, see:
+  Documentation/devicetree/bindings/iommu/iommu.txt.
+
+  For arm-smmu binding, see:
+  Documentation/devicetree/bindings/iommu/arm,smmu.yaml.
+
+  MC firmware binary images can be found here:
+  https://github.com/NXP/qoriq-mc-binary
+
+properties:
+  compatible:
+    const: fsl,qoriq-mc
+    description:
+      A Freescale Management Complex compatible with this binding must have
+      Block Revision Registers BRR1 and BRR2 at offset 0x0BF8 and 0x0BFC in
+      the MC control register region.
+
+  reg:
+    minItems: 1
+    items:
+      - description: the command portal for this machine
+      - description:
+          MC control registers. This region may not be present in some
+          scenarios, such as in the device tree presented to a virtual
+          machine.
+
+  ranges:
+    description: |
+      A standard property. Defines the mapping between the child MC address
+      space and the parent system address space.
+
+      The MC address space is defined by 3 components:
+                <region type> <offset hi> <offset lo>
+
+      Valid values for region type are:
+                  0x0 - MC portals
+                  0x1 - QBMAN portals
+
+  '#address-cells':
+    const: 3
+
+  '#size-cells':
+    const: 1
+
+  dpmacs:
+    type: object
+    description:
+      The fsl-mc node may optionally have dpmac sub-nodes that describe the
+      relationship between the Ethernet MACs which belong to the MC and the
+      Ethernet PHYs on the system board.
+
+    properties:
+      '#address-cells':
+        const: 1
+
+      '#size-cells':
+        const: 0
+
+    patternProperties:
+      "^(dpmac@[0-9a-f]+)|(ethernet@[0-9a-f]+)$":
+        type: object
+
+        $ref: /schemas/net/fsl,qoriq-mc-dpmac.yaml#
+
+  iommu-map:
+    description: |
+      Maps an ICID to an IOMMU and associated iommu-specifier data.
+
+      The property is an arbitrary number of tuples of
+      (icid-base, iommu, iommu-base, length).
+
+      Any ICID i in the interval [icid-base, icid-base + length) is
+      associated with the listed IOMMU, with the iommu-specifier
+      (i - icid-base + iommu-base).
+
+  msi-map:
+    description: |
+      Maps an ICID to a GIC ITS and associated msi-specifier data.
+
+      The property is an arbitrary number of tuples of
+      (icid-base, gic-its, msi-base, length).
+
+      Any ICID in the interval [icid-base, icid-base + length) is
+      associated with the listed GIC ITS, with the msi-specifier
+      (i - icid-base + msi-base).
+
+  msi-parent:
+    deprecated: true
+    description:
+      Points to the MSI controller node handling message interrupts for the MC.
+
+required:
+  - compatible
+  - reg
+  - iommu-map
+  - msi-map
+  - ranges
+  - '#address-cells'
+  - '#size-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    soc {
+      #address-cells = <2>;
+      #size-cells = <2>;
+
+      smmu: iommu@5000000 {
+        compatible = "arm,mmu-500";
+        #global-interrupts = <1>;
+        #iommu-cells = <1>;
+        reg = <0 0x5000000 0 0x800000>;
+        stream-match-mask = <0x7c00>;
+        interrupts = <0 13 4>,
+                     <0 146 4>, <0 147 4>,
+                     <0 148 4>, <0 149 4>,
+                     <0 150 4>, <0 151 4>,
+                     <0 152 4>, <0 153 4>;
+      };
+
+      fsl_mc: fsl-mc@80c000000 {
+        compatible = "fsl,qoriq-mc";
+        reg = <0x00000008 0x0c000000 0 0x40>,    /* MC portal base */
+        <0x00000000 0x08340000 0 0x40000>; /* MC control reg */
+        /* define map for ICIDs 23-64 */
+        iommu-map = <23 &smmu 23 41>;
+        /* define msi map for ICIDs 23-64 */
+        msi-map = <23 &its 23 41>;
+        #address-cells = <3>;
+        #size-cells = <1>;
+
+        /*
+        * Region type 0x0 - MC portals
+        * Region type 0x1 - QBMAN portals
+        */
+        ranges = <0x0 0x0 0x0 0x8 0x0c000000 0x4000000
+                  0x1 0x0 0x0 0x8 0x18000000 0x8000000>;
+
+        dpmacs {
+          #address-cells = <1>;
+          #size-cells = <0>;
+
+          ethernet@1 {
+            compatible = "fsl,qoriq-mc-dpmac";
+            reg = <1>;
+            phy-handle = <&mdio0_phy0>;
+          };
+        };
+      };
+    };
diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst
index d638b5a8aadd..b3261c5871cc 100644
--- a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst
+++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst
@@ -28,6 +28,9 @@ interfaces, an L2 switch, or accelerator instances.
 The MC provides memory-mapped I/O command interfaces (MC portals)
 which DPAA2 software drivers use to operate on DPAA2 objects.
 
+MC firmware binary images can be found here:
+https://github.com/NXP/qoriq-mc-binary
+
 The diagram below shows an overview of the DPAA2 resource management
 architecture::
 
@@ -338,7 +341,7 @@ Key functions include:
   a bind of the root DPRC to the DPRC driver
 
 The binding for the MC-bus device-tree node can be consulted at
-*Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt*.
+*Documentation/devicetree/bindings/misc/fsl,qoriq-mc.yaml*.
 The sysfs bind/unbind interfaces for the MC-bus can be consulted at
 *Documentation/ABI/testing/sysfs-bus-fsl-mc*.
 
diff --git a/MAINTAINERS b/MAINTAINERS
index b516bb34a8d5..e0ce6e2b663c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14409,9 +14409,11 @@ M:	Stuart Yoder <stuyoder@gmail.com>
 M:	Laurentiu Tudor <laurentiu.tudor@nxp.com>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
-F:	Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
+F:	Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml
+F:	Documentation/devicetree/bindings/misc/fsl,qoriq-mc.yaml
 F:	Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst
 F:	drivers/bus/fsl-mc/
+F:	include/linux/fsl/mc.h
 
 QT1010 MEDIA DRIVER
 M:	Antti Palosaari <crope@iki.fi>
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH net-next 3/3] net: phy: mscc: use new PTP_MSGTYPE_* defines
From: Antoine Tenart @ 2020-11-23  9:05 UTC (permalink / raw)
  To: Andrew Lunn, Christian Eggers, Heiner Kallweit, Jakub Kicinski,
	Richard Cochran
  Cc: Vladimir Oltean, Russell King, David S . Miller, netdev,
	linux-kernel, Christian Eggers, Christian Eggers, Quentin Schulz,
	Antoine Tenart
In-Reply-To: <20201122082636.12451-4-ceggers@arri.de>

Hello Christian,

Quoting Christian Eggers (2020-11-22 09:26:36)
> Use recently introduced PTP_MSGTYPE_SYNC and PTP_MSGTYPE_DELAY_REQ
> defines instead of a driver internal enumeration.
> 
> Signed-off-by: Christian Eggers <ceggers@gmx.de>

Reviewed-by: Antoine Tenart <atenart@kernel.org>

Thanks!
Antoine

> Cc: Quentin Schulz <quentin.schulz@bootlin.com>
> Cc: Antoine Tenart <atenart@kernel.org>
> Cc: Antoine Tenart <antoine.tenart@bootlin.com>
> ---
>  drivers/net/phy/mscc/mscc_ptp.c | 14 +++++++-------
>  drivers/net/phy/mscc/mscc_ptp.h |  5 -----
>  2 files changed, 7 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c
> index d8a61456d1ce..924ed5b034a4 100644
> --- a/drivers/net/phy/mscc/mscc_ptp.c
> +++ b/drivers/net/phy/mscc/mscc_ptp.c
> @@ -506,9 +506,9 @@ static int vsc85xx_ptp_cmp_init(struct phy_device *phydev, enum ts_blk blk)
>  {
>         struct vsc8531_private *vsc8531 = phydev->priv;
>         bool base = phydev->mdio.addr == vsc8531->ts_base_addr;
> -       enum vsc85xx_ptp_msg_type msgs[] = {
> -               PTP_MSG_TYPE_SYNC,
> -               PTP_MSG_TYPE_DELAY_REQ
> +       u8 msgs[] = {
> +               PTP_MSGTYPE_SYNC,
> +               PTP_MSGTYPE_DELAY_REQ
>         };
>         u32 val;
>         u8 i;
> @@ -847,9 +847,9 @@ static int vsc85xx_ts_ptp_action_flow(struct phy_device *phydev, enum ts_blk blk
>  static int vsc85xx_ptp_conf(struct phy_device *phydev, enum ts_blk blk,
>                             bool one_step, bool enable)
>  {
> -       enum vsc85xx_ptp_msg_type msgs[] = {
> -               PTP_MSG_TYPE_SYNC,
> -               PTP_MSG_TYPE_DELAY_REQ
> +       u8 msgs[] = {
> +               PTP_MSGTYPE_SYNC,
> +               PTP_MSGTYPE_DELAY_REQ
>         };
>         u32 val;
>         u8 i;
> @@ -858,7 +858,7 @@ static int vsc85xx_ptp_conf(struct phy_device *phydev, enum ts_blk blk,
>                 if (blk == INGRESS)
>                         vsc85xx_ts_ptp_action_flow(phydev, blk, msgs[i],
>                                                    PTP_WRITE_NS);
> -               else if (msgs[i] == PTP_MSG_TYPE_SYNC && one_step)
> +               else if (msgs[i] == PTP_MSGTYPE_SYNC && one_step)
>                         /* no need to know Sync t when sending in one_step */
>                         vsc85xx_ts_ptp_action_flow(phydev, blk, msgs[i],
>                                                    PTP_WRITE_1588);
> diff --git a/drivers/net/phy/mscc/mscc_ptp.h b/drivers/net/phy/mscc/mscc_ptp.h
> index 3ea163af0f4f..da3465360e90 100644
> --- a/drivers/net/phy/mscc/mscc_ptp.h
> +++ b/drivers/net/phy/mscc/mscc_ptp.h
> @@ -436,11 +436,6 @@ enum ptp_cmd {
>         PTP_SAVE_IN_TS_FIFO = 11, /* invalid when writing in reg */
>  };
>  
> -enum vsc85xx_ptp_msg_type {
> -       PTP_MSG_TYPE_SYNC,
> -       PTP_MSG_TYPE_DELAY_REQ,
> -};
> -
>  struct vsc85xx_ptphdr {
>         u8 tsmt; /* transportSpecific | messageType */
>         u8 ver;  /* reserved0 | versionPTP */
> -- 
> Christian Eggers
> Embedded software developer
> 

^ permalink raw reply

* Re: [PATCH net-next] ip_gre: remove CRC flag from dev features in gre_gso_segment
From: Xin Long @ 2020-11-23  9:14 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: network dev, linux-sctp @ vger . kernel . org,
	Marcelo Ricardo Leitner, David Miller, Jakub Kicinski,
	Guillaume Nault, Paolo Abeni, Lorenzo Bianconi
In-Reply-To: <CAKgT0Ud5ft8VBvkaRDewa7qDwJDH8Z=LaaQqiGYVCsu2rgCh-Q@mail.gmail.com>

On Sat, Nov 21, 2020 at 12:10 AM Alexander Duyck
<alexander.duyck@gmail.com> wrote:
>
> On Fri, Nov 20, 2020 at 2:23 AM Xin Long <lucien.xin@gmail.com> wrote:
> >
> > On Fri, Nov 20, 2020 at 1:24 AM Alexander Duyck
> > <alexander.duyck@gmail.com> wrote:
> > >
> > > On Wed, Nov 18, 2020 at 9:53 PM Xin Long <lucien.xin@gmail.com> wrote:
> > > >
> > > > On Thu, Nov 19, 2020 at 4:35 AM Alexander Duyck
> > > > <alexander.duyck@gmail.com> wrote:
> > > > >
> > > > > On Mon, Nov 16, 2020 at 1:17 AM Xin Long <lucien.xin@gmail.com> wrote:
> > > > > >
> > > > > > This patch is to let it always do CRC checksum in sctp_gso_segment()
> > > > > > by removing CRC flag from the dev features in gre_gso_segment() for
> > > > > > SCTP over GRE, just as it does in Commit 527beb8ef9c0 ("udp: support
> > > > > > sctp over udp in skb_udp_tunnel_segment") for SCTP over UDP.
> > > > > > It could set csum/csum_start in GSO CB properly in sctp_gso_segment()
> > > > > > after that commit, so it would do checksum with gso_make_checksum()
> > > > > > in gre_gso_segment(), and Commit 622e32b7d4a6 ("net: gre: recompute
> > > > > > gre csum for sctp over gre tunnels") can be reverted now.
> > > > > >
> > > > > > Signed-off-by: Xin Long <lucien.xin@gmail.com>
> > > > > > ---
> > > > > >  net/ipv4/gre_offload.c | 14 +++-----------
> > > > > >  1 file changed, 3 insertions(+), 11 deletions(-)
> > > > > >
> > > > > > diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
> > > > > > index e0a2465..a5935d4 100644
> > > > > > --- a/net/ipv4/gre_offload.c
> > > > > > +++ b/net/ipv4/gre_offload.c
> > > > > > @@ -15,12 +15,12 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
> > > > > >                                        netdev_features_t features)
> > > > > >  {
> > > > > >         int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
> > > > > > -       bool need_csum, need_recompute_csum, gso_partial;
> > > > > >         struct sk_buff *segs = ERR_PTR(-EINVAL);
> > > > > >         u16 mac_offset = skb->mac_header;
> > > > > >         __be16 protocol = skb->protocol;
> > > > > >         u16 mac_len = skb->mac_len;
> > > > > >         int gre_offset, outer_hlen;
> > > > > > +       bool need_csum, gso_partial;
> > > > > >
> > > > > >         if (!skb->encapsulation)
> > > > > >                 goto out;
> > > > > > @@ -41,10 +41,10 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
> > > > > >         skb->protocol = skb->inner_protocol;
> > > > > >
> > > > > >         need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
> > > > > > -       need_recompute_csum = skb->csum_not_inet;
> > > > > >         skb->encap_hdr_csum = need_csum;
> > > > > >
> > > > > >         features &= skb->dev->hw_enc_features;
> > > > > > +       features &= ~NETIF_F_SCTP_CRC;
> > > > > >
> > > > > >         /* segment inner packet. */
> > > > > >         segs = skb_mac_gso_segment(skb, features);
> > > > >
> > > > > Why just blindly strip NETIF_F_SCTP_CRC? It seems like it would make
> > > > > more sense if there was an explanation as to why you are stripping the
> > > > > offload. I know there are many NICs that could very easily perform
> > > > > SCTP CRC offload on the inner data as long as they didn't have to
> > > > > offload the outer data. For example the Intel NICs should be able to
> > > > > do it, although when I wrote the code up enabling their offloads I
> > > > > think it is only looking at the outer headers so that might require
> > > > > updating to get it to not use the software fallback.
> > > > >
> > > > > It really seems like we should only be clearing NETIF_F_SCTP_CRC if
> > > > > need_csum is true since we must compute the CRC before we can compute
> > > > > the GRE checksum.
> > > > Right, it's also what Jakub commented, thanks.
> > > >
> > > > >
> > > > > > @@ -99,15 +99,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
> > > > > >                 }
> > > > > >
> > > > > >                 *(pcsum + 1) = 0;
> > > > > > -               if (need_recompute_csum && !skb_is_gso(skb)) {
> > > > > > -                       __wsum csum;
> > > > > > -
> > > > > > -                       csum = skb_checksum(skb, gre_offset,
> > > > > > -                                           skb->len - gre_offset, 0);
> > > > > > -                       *pcsum = csum_fold(csum);
> > > > > > -               } else {
> > > > > > -                       *pcsum = gso_make_checksum(skb, 0);
> > > > > > -               }
> > > > > > +               *pcsum = gso_make_checksum(skb, 0);
> > > > > >         } while ((skb = skb->next));
> > > > > >  out:
> > > > > >         return segs;
> > > > >
> > > > > This change doesn't make much sense to me. How are we expecting
> > > > > gso_make_checksum to be able to generate a valid checksum when we are
> > > > > dealing with a SCTP frame? From what I can tell it looks like it is
> > > > > just setting the checksum to ~0 and checksum start to the transport
> > > > > header which isn't true because SCTP is using a CRC, not a 1's
> > > > > complement checksum, or am I missing something? As such in order to
> > > > > get the gre checksum we would need to compute it over the entire
> > > > > payload data wouldn't we? Has this been tested with an actual GRE
> > > > > tunnel that had checksums enabled? If so was it verified that the GSO
> > > > > frames were actually being segmented at the NIC level and not at the
> > > > > GRE tunnel level?
> > > > Hi Alex,
> > > >
> > > > I think you're looking at net.git? As on net-next.git, sctp_gso_make_checksum()
> > > > has been fixed to set csum/csum_start properly by Commit 527beb8ef9c0 ("udp:
> > > > support sctp over udp in skb_udp_tunnel_segment"), so that we compute it over
> > > > the entire payload data, as you said above:
> > >
> > > No. I believe the code is still wrong. That is what I was pointing
> > > out. The GSO_CB->csum is supposed to be the checksum of the header
> > > from csum_start up to the end of the payload. In the case of the 1's
> > > complement checksum that is normally the inverse of the pseudo-header
> > > checksum. We don't normally compute it and instead assume it since it
> > > is offloaded. For a CRC based checksum you would need to compute the
> > > checksum over the entire packet since CRC and checksum are very
> > > different computations. That is why we were calling skb_checksum in
> > > the original code.
> > Hi, Alex, sorry for having confused you, see below.
> >
> > >
> > > > @@ -27,7 +27,11 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
> > > >  {
> > > >         skb->ip_summed = CHECKSUM_NONE;
> > > >         skb->csum_not_inet = 0;
> > > > -       gso_reset_checksum(skb, ~0);
> > > > +       /* csum and csum_start in GSO CB may be needed to do the UDP
> > > > +        * checksum when it's a UDP tunneling packet.
> > > > +        */
> > > > +       SKB_GSO_CB(skb)->csum = (__force __wsum)~0;
> > > > +       SKB_GSO_CB(skb)->csum_start = skb_headroom(skb) + skb->len;
> > > >         return sctp_compute_cksum(skb, skb_transport_offset(skb));
> > > >  }
> > > >
> > > > And yes, this patch has been tested with GRE tunnel checksums enabled.
> > > > (... icsum ocsum).
> > > > And yes, it was segmented in the lower NIC level, and we can make it by:
> > > >
> > > > # ethtool -K gre1 tx-sctp-segmentation on
> > > > # ethtool -K veth0 tx-sctp-segmentation off
> > > >
> > > > (Note: "tx-checksum-sctp" and "gso" are on for both devices)
> > > >
> > > > Thanks.
> > >
> > > I would also turn off Tx and Rx checksum offload on your veth device
> > > in order to make certain you aren't falsely sending data across
> > > indicating that the checksums are valid when they are not. It might be
> > > better if you were to run this over an actual NIC as that could then
> > > provide independent verification as it would be a fixed checksum test.
> > >
> > > I'm still not convinced this is working correctly. Basically a crc32c
> > > is not the same thing as a 1's complement checksum so you should need
> > > to compute both if you have an SCTP packet tunneled inside a UDP or
> > > GRE packet with a checksum. I don't see how computing a crc32c should
> > > automatically give you a 1's complement checksum of ~0.
> >
> > On the tx Path [1] below, the sctp crc checksum is calculated in
> > sctp_gso_make_checksum() [a], where it calls *sctp_compute_cksum()*
> > to do that, and as for the code in it:
> >
> >     SKB_GSO_CB(skb)->csum = (__force __wsum)~0;
> >     SKB_GSO_CB(skb)->csum_start = skb_headroom(skb) + skb->len;
>
> Okay, so I think I know how this is working, but the number of things
> relied on is ugly. Normally assuming skb_headroom(skb) + skb->len
> being valid for this would be a non-starter. I was assuming you
> weren't doing the 1's compliment checksum because you weren't using
> __skb_checksum to generate it.
>
> If I am not mistaken SCTP GSO uses the GSO_BY_FRAGS and apparently
> none of the frags are using page fragments within the skb. Am I
> understanding correctly? One thing that would help to address some of
> my concerns would be to clear instead of set NETIF_F_SG in
> sctp_gso_segment since your checksum depends on linear skbs.
Right, no frag is using page fragments for SCTP GSO.
NETIF_F_SG is set here, because in skb_segment():

                if (hsize > len || !sg)
                        hsize = len;

                if (!hsize && i >= nfrags && skb_headlen(list_skb) &&
                    (skb_headlen(list_skb) == len || sg)) { <------
for flag_list

without sg set, it won't go to this 'if' block, which is the process
of frag_list, see

commit 89319d3801d1d3ac29c7df1f067038986f267d29
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Mon Dec 15 23:26:06 2008 -0800

    net: Add frag_list support to skb_segment

do you think this might be a bug in skb_segment()?

I was also thinking if SCTP GSO could go with the way of UDP's
with skb_segment_list() instead of GSO_BY_FRAGS things.
the different is that the head skb does not only include header,
but may also include userdata/payload with skb_segment_list().

>
> > is prepared for doing 1's complement checksum (for outer UDP/GRE), and used
> > in gre_gso_segment() [b], where it calls gso_make_checksum() to do that
> > when need_csum is set. Note that, here it played a TRICK:
> >
> > I set SKB_GSO_CB->csum_start to the end of this packet and
> > SKB_GSO_CB->csum = ~0 manually, so that in gso_make_checksum() it will do
> > the 1's complement checksum for outer UDP/GRE by summing all packet bits up.
> > See gso_make_checksum() (called by gre_gso_segment()):
> >
> >  unsigned char *csum_start = skb_transport_header(skb);
> >  int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start;
> >  /* now plen is from udp header to the END of packet. */
> >  __wsum partial = SKB_GSO_CB(skb)->csum;
> >
> >  return csum_fold(csum_partial(csum_start, plen, partial));
> >
> > So yes, here it does compute both if I have an SCTP packet tunnelled inside
> > a UDP or GRE packet with a checksum.
>
> Assuming you have the payload data in the skb->data section. Normally
> payload is in page frags. That is why I was concerned. You have to
> have guarantees in place that there will not be page fragments
> attached to the skb.
On SCTP TX path, sctp_packet_transmit() will always alloc linear skbs
and reserve headers for lower-layer protocols. I think this will guarantee it.

>
> > But you're right that "the GSO_CB->csum is supposed to be the checksum
> > of the header from csum_start up to the end of the payload". In this
> > TRICK, csum_start is set to the end of packet,  and csum should be
> > set to 0, and I will fix it in sctp_gso_make_checksum(), thanks!
> >
> > -       SKB_GSO_CB(skb)->csum = (__force __wsum)~0;
> > +       SKB_GSO_CB(skb)->csum = (__force __wsum)0;
> >
> > Does it make sense to you now?
>
> For a 1's compliment checksum ~0 and 0 are the same thing. So that
> value doesn't matter. The issue as I have pointed out is the fact that
> you are assuming a linear skb, and I am not certain that is what you
> will actually get out of the call to skb_segment that you make in
> sctp_gso_segment.
Thanks, didn't know ~0 and 0 are the same thing here.

>
> > Path [1]:
> >  sctp_gso_segment.cold.6+0x3c/0x87 [sctp] <----- [a]
> >  inet_gso_segment+0x152/0x3c0
> >  skb_mac_gso_segment+0xa2/0x110
> >  gre_gso_segment+0x138/0x380 <---- [b]
> >  inet_gso_segment+0x152/0x3c0
> >  skb_mac_gso_segment+0xa2/0x110
> >  __skb_gso_segment+0xba/0x160
> >  validate_xmit_skb+0x147/0x300
> >  __dev_queue_xmit+0x569/0x920
> >  ip_finish_output2+0x264/0x570
> >  ip_output+0x6d/0x100
> >  iptunnel_xmit+0x16e/0x200
> >  ip_tunnel_xmit+0x437/0x870 [ip_tunnel]
> >  ipgre_xmit+0x17b/0x210 [ip_gre]
> >  dev_hard_start_xmit+0xd4/0x200
> >  __dev_queue_xmit+0x78c/0x920
> >  ip_finish_output2+0x194/0x570
> >  ip_output+0x6d/0x100
> >  __ip_queue_xmit+0x15d/0x430
> >  sctp_packet_transmit+0x706/0x9b0 [sctp]
> >  sctp_outq_flush+0xd7/0x8d0 [sctp]
> >  sctp_cmd_interpreter.isra.21+0x721/0x1a20 [sctp]
> >  sctp_do_sm+0xc3/0x2a0 [sctp]
> >  sctp_primitive_SEND+0x2f/0x40 [sctp]
> >  sctp_sendmsg_to_asoc+0x5fa/0x870 [sctp]
> >  sctp_sendmsg+0x692/0x9d0 [sctp]
> >  sock_sendmsg+0x54/0x60

^ permalink raw reply

* [PATCH net-next v2 2/2] net/mlx5e: Add DSFP EEPROM dump support to ethtool
From: Moshe Shemesh @ 2020-11-23  9:19 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Andrew Lunn, Adrian Pop,
	Michal Kubecek
  Cc: netdev, Vladyslav Tarasiuk, Moshe Shemesh
In-Reply-To: <1606123198-6230-1-git-send-email-moshe@mellanox.com>

From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>

DSFP is a new cable module type, which EEPROM uses memory layout
described in CMIS 4.0 document. Use corresponding standard value for
userspace ethtool to distinguish DSFP's layout from older standards.

Add DSFP module ID in accordance to SFF-8024.

DSFP module memory can be flat or paged, which is indicated by a
flat_mem bit. In first case, only page 00 is available, and in second -
multiple pages: 00h, 01h, 02h, 10h and 11h. These five pages in bank
zero include the mandatory part for passive and active cables.

Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  | 12 ++++-
 .../net/ethernet/mellanox/mlx5/core/port.c    | 52 ++++++++++++++++---
 include/linux/mlx5/port.h                     |  1 +
 3 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 42e61dc28ead..e6e80f1b0e94 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1659,8 +1659,8 @@ static int mlx5e_get_module_info(struct net_device *netdev,
 	int size_read = 0;
 	u8 data[4] = {0};
 
-	size_read = mlx5_query_module_eeprom(dev, 0, 2, data);
-	if (size_read < 2)
+	size_read = mlx5_query_module_eeprom(dev, 0, 3, data);
+	if (size_read < 3)
 		return -EIO;
 
 	/* data[0] = identifier byte */
@@ -1680,6 +1680,14 @@ static int mlx5e_get_module_info(struct net_device *netdev,
 			modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
 		}
 		break;
+	case MLX5_MODULE_ID_DSFP:
+		modinfo->type = ETH_MODULE_CMIS_4;
+		/* check flat_mem bit, zero indicates paged memory */
+		if (data[2] & 0x80)
+			modinfo->eeprom_len = ETH_MODULE_CMIS_4_LEN;
+		else
+			modinfo->eeprom_len = ETH_MODULE_CMIS_4_MAX_LEN;
+		break;
 	case MLX5_MODULE_ID_SFP:
 		modinfo->type       = ETH_MODULE_SFF_8472;
 		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 4bb219565c58..df8e3d024479 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -311,13 +311,9 @@ static int mlx5_query_module_id(struct mlx5_core_dev *dev, int module_num,
 	return 0;
 }
 
-static int mlx5_qsfp_eeprom_page(u16 offset)
+static int mlx5_eeprom_high_page_num(u16 offset)
 {
-	if (offset < MLX5_EEPROM_PAGE_LENGTH)
-		/* Addresses between 0-255 - page 00 */
-		return 0;
-
-	/* Addresses between 256 - 639 belongs to pages 01, 02 and 03
+	/* Addresses 256 and higher belong to pages 01, 02, etc.
 	 * For example, offset = 400 belongs to page 02:
 	 * 1 + ((400 - 256)/128) = 2
 	 */
@@ -325,6 +321,16 @@ static int mlx5_qsfp_eeprom_page(u16 offset)
 		    MLX5_EEPROM_HIGH_PAGE_LENGTH);
 }
 
+static int mlx5_qsfp_eeprom_page(u16 offset)
+{
+	if (offset < MLX5_EEPROM_PAGE_LENGTH)
+		/* Addresses between 0-255 - page 00 */
+		return 0;
+
+	/* Addresses between 256 - 639 belong to pages 01, 02 and 03 */
+	return mlx5_eeprom_high_page_num(offset);
+}
+
 static int mlx5_qsfp_eeprom_high_page_offset(int page_num)
 {
 	if (!page_num) /* Page 0 always start from low page */
@@ -341,6 +347,37 @@ static void mlx5_qsfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offse
 	*offset -=  mlx5_qsfp_eeprom_high_page_offset(*page_num);
 }
 
+static int mlx5_dsfp_eeprom_high_page_offset(int page_num)
+{
+	if (!page_num)
+		return 0;
+
+	return (page_num < 0x10 ? page_num : page_num - 13) * MLX5_EEPROM_HIGH_PAGE_LENGTH;
+}
+
+static int mlx5_dsfp_eeprom_page(u16 offset)
+{
+	if (offset < MLX5_EEPROM_PAGE_LENGTH)
+		return 0;
+
+	if (offset < MLX5_EEPROM_PAGE_LENGTH + (MLX5_EEPROM_HIGH_PAGE_LENGTH * 2))
+		/* Addresses 0 - 511 - pages 00, 01 and 02 */
+		return mlx5_eeprom_high_page_num(offset);
+
+	/* Offsets 512 - 767 belong to pages 10h and 11h.
+	 * For example, offset = 700 belongs to page 11:
+	 * 13 + 1 + ((700 - 256) / 128) = 17 = 0x11
+	 */
+	return 13 + mlx5_eeprom_high_page_num(offset);
+}
+
+static void mlx5_dsfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset)
+{
+	*i2c_addr = MLX5_I2C_ADDR_LOW;
+	*page_num = mlx5_dsfp_eeprom_page(*offset);
+	*offset -= mlx5_dsfp_eeprom_high_page_offset(*page_num);
+}
+
 static void mlx5_sfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset)
 {
 	*i2c_addr = MLX5_I2C_ADDR_LOW;
@@ -380,6 +417,9 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
 	case MLX5_MODULE_ID_QSFP28:
 		mlx5_qsfp_eeprom_params_set(&i2c_addr, &page_num, &offset);
 		break;
+	case MLX5_MODULE_ID_DSFP:
+		mlx5_dsfp_eeprom_params_set(&i2c_addr, &page_num, &offset);
+		break;
 	default:
 		mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
 		return -EINVAL;
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 23edd2db4803..ad4b2e778d46 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -45,6 +45,7 @@ enum mlx5_module_id {
 	MLX5_MODULE_ID_QSFP             = 0xC,
 	MLX5_MODULE_ID_QSFP_PLUS        = 0xD,
 	MLX5_MODULE_ID_QSFP28           = 0x11,
+	MLX5_MODULE_ID_DSFP             = 0x1B,
 };
 
 enum mlx5_an_status {
-- 
2.18.2


^ permalink raw reply related

* [PATCH net-next v2 1/2] ethtool: Add CMIS 4.0 module type to UAPI
From: Moshe Shemesh @ 2020-11-23  9:19 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Andrew Lunn, Adrian Pop,
	Michal Kubecek
  Cc: netdev, Vladyslav Tarasiuk, Moshe Shemesh
In-Reply-To: <1606123198-6230-1-git-send-email-moshe@mellanox.com>

From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>

CMIS 4.0 document describes a universal EEPROM memory layout, which is
used for some modules such as DSFP, OSFP and QSFP-DD modules. In order
to distinguish them in userspace from existing standards, add
corresponding values.

CMIS 4.0 EERPOM memory includes mandatory and optional pages, the max
read length 768B includes passive and active cables mandatory pages.

Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
---
 include/uapi/linux/ethtool.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 9ca87bc73c44..0ec4c0ea3235 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1861,9 +1861,12 @@ static inline int ethtool_validate_duplex(__u8 duplex)
 #define ETH_MODULE_SFF_8636_LEN		256
 #define ETH_MODULE_SFF_8436		0x4
 #define ETH_MODULE_SFF_8436_LEN		256
+#define ETH_MODULE_CMIS_4		0x5
+#define ETH_MODULE_CMIS_4_LEN		256
 
 #define ETH_MODULE_SFF_8636_MAX_LEN     640
 #define ETH_MODULE_SFF_8436_MAX_LEN     640
+#define ETH_MODULE_CMIS_4_MAX_LEN	768
 
 /* Reset flags */
 /* The reset() operation must clear the flags for the components which
-- 
2.18.2


^ permalink raw reply related

* [PATCH net-next v2 0/2] Add support for DSFP transceiver type
From: Moshe Shemesh @ 2020-11-23  9:19 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, Andrew Lunn, Adrian Pop,
	Michal Kubecek
  Cc: netdev, Vladyslav Tarasiuk, Moshe Shemesh, Moshe Shemesh

Add support for new cable module type DSFP (Dual Small Form-Factor Pluggable
transceiver). DSFP EEPROM memory layout is compatible with CMIS 4.0 spec. Add
CMIS 4.0 module type to UAPI and implement DSFP EEPROM dump in mlx5.

Change log:
v1 -> v2
- Added comments on accessing only the mandatory part of passive and
  active cables.

Vladyslav Tarasiuk (2):
  ethtool: Add CMIS 4.0 module type to UAPI
  net/mlx5e: Add DSFP EEPROM dump support to ethtool

 .../ethernet/mellanox/mlx5/core/en_ethtool.c  | 12 ++++-
 .../net/ethernet/mellanox/mlx5/core/port.c    | 52 ++++++++++++++++---
 include/linux/mlx5/port.h                     |  1 +
 include/uapi/linux/ethtool.h                  |  3 ++
 4 files changed, 60 insertions(+), 8 deletions(-)

-- 
2.18.2


^ permalink raw reply

* RE: [PATCH v15 0/9] Enable ptp_kvm for arm/arm64
From: Jianyong Wu @ 2020-11-23  9:26 UTC (permalink / raw)
  To: Jianyong Wu, netdev@vger.kernel.org, yangbo.lu@nxp.com,
	john.stultz@linaro.org, tglx@linutronix.de, pbonzini@redhat.com,
	sean.j.christopherson@intel.com, maz@kernel.org,
	richardcochran@gmail.com, Mark Rutland, will@kernel.org,
	Suzuki Poulose, Andre Przywara, Steven Price
  Cc: linux-kernel@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	kvmarm@lists.cs.columbia.edu, kvm@vger.kernel.org, Steve Capper,
	Justin He, nd
In-Reply-To: <20201111062211.33144-1-jianyong.wu@arm.com>

Hi,
Ping ...
Any comments?

> -----Original Message-----
> From: Jianyong Wu <jianyong.wu@arm.com>
> Sent: Wednesday, November 11, 2020 2:22 PM
> To: netdev@vger.kernel.org; yangbo.lu@nxp.com; john.stultz@linaro.org;
> tglx@linutronix.de; pbonzini@redhat.com; sean.j.christopherson@intel.com;
> maz@kernel.org; richardcochran@gmail.com; Mark Rutland
> <Mark.Rutland@arm.com>; will@kernel.org; Suzuki Poulose
> <Suzuki.Poulose@arm.com>; Andre Przywara <Andre.Przywara@arm.com>;
> Steven Price <Steven.Price@arm.com>
> Cc: linux-kernel@vger.kernel.org; linux-arm-kernel@lists.infradead.org;
> kvmarm@lists.cs.columbia.edu; kvm@vger.kernel.org; Steve Capper
> <Steve.Capper@arm.com>; Justin He <Justin.He@arm.com>; Jianyong Wu
> <Jianyong.Wu@arm.com>; nd <nd@arm.com>
> Subject: [PATCH v15 0/9] Enable ptp_kvm for arm/arm64
> 
> Currently, we offen use ntp (sync time with remote network clock) to sync
> time in VM. But the precision of ntp is subject to network delay so it's difficult
> to sync time in a high precision.
> 
> kvm virtual ptp clock (ptp_kvm) offers another way to sync time in VM, as
> the remote clock locates in the host instead of remote network clock.
> It targets to sync time between guest and host in virtualization environment
> and in this way, we can keep the time of all the VMs running in the same host
> in sync. In general, the delay of communication between host and guest is
> quiet small, so ptp_kvm can offer time sync precision up to in order of
> nanosecond. Please keep in mind that ptp_kvm just limits itself to be a
> channel which transmit the remote clock from host to guest and leaves the
> time sync jobs to an application, eg. chrony, in usersapce in VM.
> 
> How ptp_kvm works:
> After ptp_kvm initialized, there will be a new device node under /dev called
> ptp%d. A guest userspace service, like chrony, can use this device to get host
> walltime, sometimes also counter cycle, which depends on the service it calls.
> Then this guest userspace service can use those data to do the time sync for
> guest.
> here is a rough sketch to show how kvm ptp clock works.
> 
> |----------------------------|              |--------------------------|
> |       guest userspace      |              |          host            |
> |ioctl -> /dev/ptp%d         |              |                          |
> |       ^   |                |              |                          |
> |----------------------------|              |                          |
> |       |   | guest kernel   |              |                          |
> |       |   V      (get host walltime/counter cycle)                   |
> |      ptp_kvm -> hypercall - - - - - - - - - - ->hypercall service    |
> |                         <- - - - - - - - - - - -                     |
> |----------------------------|              |--------------------------|
> 
> 1. time sync service in guest userspace call ptp device through /dev/ptp%d.
> 2. ptp_kvm module in guest recive this request then invoke hypercall to
> route into host kernel to request host walltime/counter cycle.
> 3. ptp_kvm hypercall service in host response to the request and send data
> back.
> 4. ptp (not ptp_kvm) in guest copy the data to userspace.
> 
> This ptp_kvm implementation focuses itself to step 2 and 3 and step 2 works
> in guest comparing step 3 works in host kernel.
> 
> change log:
> 
> from v14 to v15
>         (1) enable ptp_kvm on arm32 guest, also ptp_kvm has been tested on
> both arm64 and arm32 guest running on arm64 kvm host.
>         (2) move arch-agnostic part of ptp_kvm.rst into timekeeping.rst.
>         (3) rename KVM_CAP_ARM_PTP_KVM to KVM_CAP_PTP_KVM as it
> should be arch agnostic.
>         (4) add description for KVM_CAP_PTP_KVM in
> Documentation/virt/kvm/api.rst.
>         (5) adjust dependency in Kconfig for ptp_kvm.
>         (6) refine multi-arch process in driver/ptp/Makefile.
>         (7) fix make pdfdocs htmldocs issue for ptp_kvm doc.
>         (8) address other issues from comments in v14.
>         (9) fold hypercall service of ptp_kvm as a function.
>         (10) rebase to 5.10-rc3.
> 
> from v13 to v14
>         (1) rebase code on 5.9-rc3.
>         (2) add a document to introduce implementation of PTP_KVM on arm64.
>         (3) fix comments issue in hypercall.c.
>         (4) export arm_smccc_1_1_get_conduit using EXPORT_SYMBOL_GPL.
>         (5) fix make issue on x86 reported by kernel test robot.
> 
> from v12 to v13:
>         (1) rebase code on 5.8-rc1.
>         (2) this patch set base on 2 patches of 1/8 and 2/8 from Will Decon.
>         (3) remove the change to ptp device code of extend getcrosststamp.
>         (4) remove the mechanism of letting user choose the counter type in
> ptp_kvm for arm64.
>         (5) add virtual counter option in ptp_kvm service to let user choose the
> specific counter explicitly.
> 
> from v11 to v12:
>         (1) rebase code on 5.7-rc6 and rebase 2 patches from Will Decon
> including 1/11 and 2/11. as these patches introduce discover mechanism of
> vendor smccc service.
>         (2) rebase ptp_kvm hypercall service from standard smccc to vendor
> smccc and add ptp_kvm to vendor smccc service discover mechanism.
>         (3) add detail of why we need ptp_kvm and how ptp_kvm works in cover
> letter.
> 
> from v10 to v11:
>         (1) rebase code on 5.7-rc2.
>         (2) remove support for arm32, as kvm support for arm32 will be removed
> [1]
>         (3) add error report in ptp_kvm initialization.
> 
> from v9 to v10:
>         (1) change code base to v5.5.
>         (2) enable ptp_kvm both for arm32 and arm64.
>         (3) let user choose which of virtual counter or physical counter should
> return when using crosstimestamp mode of ptp_kvm for arm/arm64.
>         (4) extend input argument for getcrosstimestamp API.
> 
> from v8 to v9:
>         (1) move ptp_kvm.h to driver/ptp/
>         (2) replace license declaration of ptp_kvm.h the same with other header
> files in the same directory.
> 
> from v7 to v8:
>         (1) separate adding clocksource id for arm_arch_counter as a single patch.
>         (2) update commit message for patch 4/8.
>         (3) refine patch 7/8 and patch 8/8 to make them more independent.
> 
> from v5 to v6:
>         (1) apply Mark's patch[4] to get SMCCC conduit.
>         (2) add mechanism to recognize current clocksource by add
> clocksouce_id value into struct clocksource instead of method in patch-v5.
>         (3) rename kvm_arch_ptp_get_clock_fn into
> kvm_arch_ptp_get_crosststamp.
> 
> from v4 to v5:
>         (1) remove hvc delay compensasion as it should leave to userspace.
>         (2) check current clocksource in hvc call service.
>         (3) expose current clocksource by adding it to system_time_snapshot.
>         (4) add helper to check if clocksource is arm_arch_counter.
>         (5) rename kvm_ptp.c to ptp_kvm_common.c
> 
> from v3 to v4:
>         (1) fix clocksource of ptp_kvm to arch_sys_counter.
>         (2) move kvm_arch_ptp_get_clock_fn into arm_arch_timer.c
>         (3) subtract cntvoff before return cycles from host.
>         (4) use ktime_get_snapshot instead of getnstimeofday and
> get_current_counterval to return time and counter value.
>         (5) split ktime and counter into two 32-bit block respectively to avoid
> Y2038-safe issue.
>         (6) set time compensation to device time as half of the delay of hvc call.
>         (7) add ARM_ARCH_TIMER as dependency of ptp_kvm for arm64.
> 
> from v2 to v3:
>         (1) fix some issues in commit log.
>         (2) add some receivers in send list.
> 
> from v1 to v2:
>         (1) move arch-specific code from arch/ to driver/ptp/
>         (2) offer mechanism to inform userspace if ptp_kvm service is available.
>         (3) separate ptp_kvm code for arm64 into hypervisor part and guest part.
>         (4) add API to expose monotonic clock and counter value.
>         (5) refine code: remove no necessary part and reconsitution.
> 
> [1] https://patchwork.kernel.org/cover/11373351/
> 
> 
> Jianyong Wu (6):
>   ptp: Reorganize ptp_kvm module to make it arch-independent.
>   clocksource: Add clocksource id for arm arch counter
>   arm64/kvm: Add hypercall service for kvm ptp.
>   ptp: arm/arm64: Enable ptp_kvm for arm/arm64
>   doc: add ptp_kvm introduction for arm64 support
>   arm64: Add kvm capability check extension for ptp_kvm
> 
> Thomas Gleixner (1):
>   time: Add mechanism to recognize clocksource in time_get_snapshot
> 
> Will Deacon (2):
>   arm64: Probe for the presence of KVM hypervisor
>   arm/arm64: KVM: Advertise KVM UID to guests via SMCCC
> 
>  Documentation/virt/kvm/api.rst              |  9 ++
>  Documentation/virt/kvm/arm/index.rst        |  1 +
>  Documentation/virt/kvm/arm/ptp_kvm.rst      | 29 +++++++
>  Documentation/virt/kvm/timekeeping.rst      | 35 ++++++++
>  arch/arm/kernel/setup.c                     |  1 +
>  arch/arm64/kernel/setup.c                   |  1 +
>  arch/arm64/kvm/arm.c                        |  1 +
>  arch/arm64/kvm/hypercalls.c                 | 88 +++++++++++++++++--
>  drivers/clocksource/arm_arch_timer.c        | 30 +++++++
>  drivers/firmware/smccc/smccc.c              | 37 ++++++++
>  drivers/ptp/Kconfig                         |  2 +-
>  drivers/ptp/Makefile                        |  2 +
>  drivers/ptp/ptp_kvm.h                       | 11 +++
>  drivers/ptp/ptp_kvm_arm.c                   | 44 ++++++++++
>  drivers/ptp/{ptp_kvm.c => ptp_kvm_common.c} | 85 +++++-------------
>  drivers/ptp/ptp_kvm_x86.c                   | 95 +++++++++++++++++++++
>  include/linux/arm-smccc.h                   | 60 +++++++++++++
>  include/linux/clocksource.h                 |  6 ++
>  include/linux/clocksource_ids.h             | 12 +++
>  include/linux/timekeeping.h                 | 12 +--
>  include/uapi/linux/kvm.h                    |  1 +
>  kernel/time/clocksource.c                   |  2 +
>  kernel/time/timekeeping.c                   |  1 +
>  23 files changed, 488 insertions(+), 77 deletions(-)  create mode 100644
> Documentation/virt/kvm/arm/ptp_kvm.rst
>  create mode 100644 drivers/ptp/ptp_kvm.h  create mode 100644
> drivers/ptp/ptp_kvm_arm.c  rename drivers/ptp/{ptp_kvm.c =>
> ptp_kvm_common.c} (60%)  create mode 100644
> drivers/ptp/ptp_kvm_x86.c  create mode 100644
> include/linux/clocksource_ids.h
> 
> --
> 2.17.1


^ permalink raw reply

* Re: [PATCH net-next v4 2/5] net/lapb: support netdev events
From: Xie He @ 2020-11-23  9:36 UTC (permalink / raw)
  To: Martin Schiller
  Cc: Andrew Hendry, David S. Miller, Jakub Kicinski, Linux X25,
	Linux Kernel Network Developers, LKML
In-Reply-To: <d85a4543eae46bac1de28ec17a2389dd@dev.tdt.de>

On Mon, Nov 23, 2020 at 1:00 AM Martin Schiller <ms@dev.tdt.de> wrote:
>
> AFAIK the carrier can't be up before the device is up. Therefore, there
> will be a NETDEV_CHANGE event after the NETDEV_UP event.
>
> This is what I can see in my tests (with the HDLC interface).
>
> Is the behaviour different for e.g. lapbether?

Some drivers don't support carrier status and will never change it.
Their carrier status will always be UP. There will not be a
NETDEV_CHANGE event.

lapbether doesn't change carrier status. I also have my own virtual
HDLC WAN driver (for testing) which also doesn't change carrier
status.

I just tested with lapbether. When I bring up the interface, there
will only be NETDEV_PRE_UP and then NETDEV_UP. There will not be
NETDEV_CHANGE. The carrier status is alway UP.

I haven't tested whether a device can receive NETDEV_CHANGE when it is
down. It's possible for a device driver to call netif_carrier_on when
the interface is down. Do you know what will happen if a device driver
calls netif_carrier_on when the interface is down?

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox