Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next v3 13/16] selftests: forwarding: devlink_lib: Allow tests to define devlink device
From: Ido Schimmel @ 2019-08-17 13:28 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190817132825.29790-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

For tests that create their network interfaces dynamically or do not use
interfaces at all (as with netdevsim) it is useful to define their own
devlink device instead of deriving it from the first network interface.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 .../selftests/net/forwarding/devlink_lib.sh   | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 8553a67a2322..2b9296f6aa07 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -4,19 +4,21 @@
 ##############################################################################
 # Defines
 
-DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
-		     | jq -r '.port | keys[]' | cut -d/ -f-2)
-if [ -z "$DEVLINK_DEV" ]; then
-	echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
-	exit 1
-fi
-if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
-	echo "SKIP: devlink device's bus is not PCI"
-	exit 1
-fi
+if [[ ! -v DEVLINK_DEV ]]; then
+	DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
+			     | jq -r '.port | keys[]' | cut -d/ -f-2)
+	if [ -z "$DEVLINK_DEV" ]; then
+		echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
+		exit 1
+	fi
+	if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
+		echo "SKIP: devlink device's bus is not PCI"
+		exit 1
+	fi
 
-DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
-		 -n | cut -d" " -f3)
+	DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
+			 -n | cut -d" " -f3)
+fi
 
 ##############################################################################
 # Sanity checks
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v3 14/16] selftests: forwarding: devlink_lib: Add devlink-trap helpers
From: Ido Schimmel @ 2019-08-17 13:28 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190817132825.29790-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

Add helpers to interact with devlink-trap, such as setting the action of
a trap and retrieving statistics.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 .../selftests/net/forwarding/devlink_lib.sh   | 163 ++++++++++++++++++
 1 file changed, 163 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 2b9296f6aa07..13d03a6d85ba 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -29,6 +29,12 @@ if [ $? -ne 0 ]; then
 	exit 1
 fi
 
+devlink help 2>&1 | grep trap &> /dev/null
+if [ $? -ne 0 ]; then
+	echo "SKIP: iproute2 too old, missing devlink trap support"
+	exit 1
+fi
+
 ##############################################################################
 # Devlink helpers
 
@@ -192,3 +198,160 @@ devlink_tc_bind_pool_th_restore()
 	devlink sb tc bind set $port tc $tc type $dir \
 		pool ${orig[0]} th ${orig[1]}
 }
+
+devlink_traps_num_get()
+{
+	devlink -j trap | jq '.[]["'$DEVLINK_DEV'"] | length'
+}
+
+devlink_traps_get()
+{
+	devlink -j trap | jq -r '.[]["'$DEVLINK_DEV'"][].name'
+}
+
+devlink_trap_type_get()
+{
+	local trap_name=$1; shift
+
+	devlink -j trap show $DEVLINK_DEV trap $trap_name \
+		| jq -r '.[][][].type'
+}
+
+devlink_trap_action_set()
+{
+	local trap_name=$1; shift
+	local action=$1; shift
+
+	# Pipe output to /dev/null to avoid expected warnings.
+	devlink trap set $DEVLINK_DEV trap $trap_name \
+		action $action &> /dev/null
+}
+
+devlink_trap_action_get()
+{
+	local trap_name=$1; shift
+
+	devlink -j trap show $DEVLINK_DEV trap $trap_name \
+		| jq -r '.[][][].action'
+}
+
+devlink_trap_group_get()
+{
+	devlink -j trap show $DEVLINK_DEV trap $trap_name \
+		| jq -r '.[][][].group'
+}
+
+devlink_trap_metadata_test()
+{
+	local trap_name=$1; shift
+	local metadata=$1; shift
+
+	devlink -jv trap show $DEVLINK_DEV trap $trap_name \
+		| jq -e '.[][][].metadata | contains(["'$metadata'"])' \
+		&> /dev/null
+}
+
+devlink_trap_rx_packets_get()
+{
+	local trap_name=$1; shift
+
+	devlink -js trap show $DEVLINK_DEV trap $trap_name \
+		| jq '.[][][]["stats"]["rx"]["packets"]'
+}
+
+devlink_trap_rx_bytes_get()
+{
+	local trap_name=$1; shift
+
+	devlink -js trap show $DEVLINK_DEV trap $trap_name \
+		| jq '.[][][]["stats"]["rx"]["bytes"]'
+}
+
+devlink_trap_stats_idle_test()
+{
+	local trap_name=$1; shift
+	local t0_packets t0_bytes
+	local t1_packets t1_bytes
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+	t0_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+	sleep 1
+
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+	t1_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then
+		return 0
+	else
+		return 1
+	fi
+}
+
+devlink_traps_enable_all()
+{
+	local trap_name
+
+	for trap_name in $(devlink_traps_get); do
+		devlink_trap_action_set $trap_name "trap"
+	done
+}
+
+devlink_traps_disable_all()
+{
+	for trap_name in $(devlink_traps_get); do
+		devlink_trap_action_set $trap_name "drop"
+	done
+}
+
+devlink_trap_groups_get()
+{
+	devlink -j trap group | jq -r '.[]["'$DEVLINK_DEV'"][].name'
+}
+
+devlink_trap_group_action_set()
+{
+	local group_name=$1; shift
+	local action=$1; shift
+
+	# Pipe output to /dev/null to avoid expected warnings.
+	devlink trap group set $DEVLINK_DEV group $group_name action $action \
+		&> /dev/null
+}
+
+devlink_trap_group_rx_packets_get()
+{
+	local group_name=$1; shift
+
+	devlink -js trap group show $DEVLINK_DEV group $group_name \
+		| jq '.[][][]["stats"]["rx"]["packets"]'
+}
+
+devlink_trap_group_rx_bytes_get()
+{
+	local group_name=$1; shift
+
+	devlink -js trap group show $DEVLINK_DEV group $group_name \
+		| jq '.[][][]["stats"]["rx"]["bytes"]'
+}
+
+devlink_trap_group_stats_idle_test()
+{
+	local group_name=$1; shift
+	local t0_packets t0_bytes
+	local t1_packets t1_bytes
+
+	t0_packets=$(devlink_trap_group_rx_packets_get $group_name)
+	t0_bytes=$(devlink_trap_group_rx_bytes_get $group_name)
+
+	sleep 1
+
+	t1_packets=$(devlink_trap_group_rx_packets_get $group_name)
+	t1_bytes=$(devlink_trap_group_rx_bytes_get $group_name)
+
+	if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then
+		return 0
+	else
+		return 1
+	fi
+}
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v3 15/16] selftests: devlink_trap: Add test cases for devlink-trap
From: Ido Schimmel @ 2019-08-17 13:28 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190817132825.29790-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

Add test cases for devlink-trap on top of the netdevsim implementation.

The tests focus on the devlink-trap core infrastructure and user space
API. They test both good and bad flows and also dismantle of the netdev
and devlink device used to report trapped packets.

This allows device drivers to focus their tests on device-specific
functionality.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 .../drivers/net/netdevsim/devlink_trap.sh     | 364 ++++++++++++++++++
 1 file changed, 364 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh

diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
new file mode 100755
index 000000000000..f101ab9441e2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -0,0 +1,364 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking devlink-trap functionality. It makes use of
+# netdevsim which implements the required callbacks.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	init_test
+	trap_action_test
+	trap_metadata_test
+	bad_trap_test
+	bad_trap_action_test
+	trap_stats_test
+	trap_group_action_test
+	bad_trap_group_test
+	trap_group_stats_test
+	port_del_test
+	dev_del_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SLEEP_TIME=1
+NETDEV=""
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+require_command udevadm
+
+modprobe netdevsim &> /dev/null
+if [ ! -d "$NETDEVSIM_PATH" ]; then
+	echo "SKIP: No netdevsim support"
+	exit 1
+fi
+
+if [ -d "${NETDEVSIM_PATH}/devices/netdevsim${DEV_ADDR}" ]; then
+	echo "SKIP: Device netdevsim${DEV_ADDR} already exists"
+	exit 1
+fi
+
+init_test()
+{
+	RET=0
+
+	test $(devlink_traps_num_get) -ne 0
+	check_err $? "No traps were registered"
+
+	log_test "Initialization"
+}
+
+trap_action_test()
+{
+	local orig_action
+	local trap_name
+	local action
+
+	RET=0
+
+	for trap_name in $(devlink_traps_get); do
+		# The action of non-drop traps cannot be changed.
+		if [ $(devlink_trap_type_get $trap_name) = "drop" ]; then
+			devlink_trap_action_set $trap_name "trap"
+			action=$(devlink_trap_action_get $trap_name)
+			if [ $action != "trap" ]; then
+				check_err 1 "Trap $trap_name did not change action to trap"
+			fi
+
+			devlink_trap_action_set $trap_name "drop"
+			action=$(devlink_trap_action_get $trap_name)
+			if [ $action != "drop" ]; then
+				check_err 1 "Trap $trap_name did not change action to drop"
+			fi
+		else
+			orig_action=$(devlink_trap_action_get $trap_name)
+
+			devlink_trap_action_set $trap_name "trap"
+			action=$(devlink_trap_action_get $trap_name)
+			if [ $action != $orig_action ]; then
+				check_err 1 "Trap $trap_name changed action when should not"
+			fi
+
+			devlink_trap_action_set $trap_name "drop"
+			action=$(devlink_trap_action_get $trap_name)
+			if [ $action != $orig_action ]; then
+				check_err 1 "Trap $trap_name changed action when should not"
+			fi
+		fi
+	done
+
+	log_test "Trap action"
+}
+
+trap_metadata_test()
+{
+	local trap_name
+
+	RET=0
+
+	for trap_name in $(devlink_traps_get); do
+		devlink_trap_metadata_test $trap_name "input_port"
+		check_err $? "Input port not reported as metadata of trap $trap_name"
+	done
+
+	log_test "Trap metadata"
+}
+
+bad_trap_test()
+{
+	RET=0
+
+	devlink_trap_action_set "made_up_trap" "drop"
+	check_fail $? "Did not get an error for non-existing trap"
+
+	log_test "Non-existing trap"
+}
+
+bad_trap_action_test()
+{
+	local traps_arr
+	local trap_name
+
+	RET=0
+
+	# Pick first trap.
+	traps_arr=($(devlink_traps_get))
+	trap_name=${traps_arr[0]}
+
+	devlink_trap_action_set $trap_name "made_up_action"
+	check_fail $? "Did not get an error for non-existing trap action"
+
+	log_test "Non-existing trap action"
+}
+
+trap_stats_test()
+{
+	local trap_name
+
+	RET=0
+
+	for trap_name in $(devlink_traps_get); do
+		devlink_trap_stats_idle_test $trap_name
+		check_err $? "Stats of trap $trap_name not idle when netdev down"
+
+		ip link set dev $NETDEV up
+
+		if [ $(devlink_trap_type_get $trap_name) = "drop" ]; then
+			devlink_trap_action_set $trap_name "trap"
+			devlink_trap_stats_idle_test $trap_name
+			check_fail $? "Stats of trap $trap_name idle when action is trap"
+
+			devlink_trap_action_set $trap_name "drop"
+			devlink_trap_stats_idle_test $trap_name
+			check_err $? "Stats of trap $trap_name not idle when action is drop"
+		else
+			devlink_trap_stats_idle_test $trap_name
+			check_fail $? "Stats of non-drop trap $trap_name idle when should not"
+		fi
+
+		ip link set dev $NETDEV down
+	done
+
+	log_test "Trap statistics"
+}
+
+trap_group_action_test()
+{
+	local curr_group group_name
+	local trap_name
+	local trap_type
+	local action
+
+	RET=0
+
+	for group_name in $(devlink_trap_groups_get); do
+		devlink_trap_group_action_set $group_name "trap"
+
+		for trap_name in $(devlink_traps_get); do
+			curr_group=$(devlink_trap_group_get $trap_name)
+			if [ $curr_group != $group_name ]; then
+				continue
+			fi
+
+			trap_type=$(devlink_trap_type_get $trap_name)
+			if [ $trap_type != "drop" ]; then
+				continue
+			fi
+
+			action=$(devlink_trap_action_get $trap_name)
+			if [ $action != "trap" ]; then
+				check_err 1 "Trap $trap_name did not change action to trap"
+			fi
+		done
+
+		devlink_trap_group_action_set $group_name "drop"
+
+		for trap_name in $(devlink_traps_get); do
+			curr_group=$(devlink_trap_group_get $trap_name)
+			if [ $curr_group != $group_name ]; then
+				continue
+			fi
+
+			trap_type=$(devlink_trap_type_get $trap_name)
+			if [ $trap_type != "drop" ]; then
+				continue
+			fi
+
+			action=$(devlink_trap_action_get $trap_name)
+			if [ $action != "drop" ]; then
+				check_err 1 "Trap $trap_name did not change action to drop"
+			fi
+		done
+	done
+
+	log_test "Trap group action"
+}
+
+bad_trap_group_test()
+{
+	RET=0
+
+	devlink_trap_group_action_set "made_up_trap_group" "drop"
+	check_fail $? "Did not get an error for non-existing trap group"
+
+	log_test "Non-existing trap group"
+}
+
+trap_group_stats_test()
+{
+	local group_name
+
+	RET=0
+
+	for group_name in $(devlink_trap_groups_get); do
+		devlink_trap_group_stats_idle_test $group_name
+		check_err $? "Stats of trap group $group_name not idle when netdev down"
+
+		ip link set dev $NETDEV up
+
+		devlink_trap_group_action_set $group_name "trap"
+		devlink_trap_group_stats_idle_test $group_name
+		check_fail $? "Stats of trap group $group_name idle when action is trap"
+
+		devlink_trap_group_action_set $group_name "drop"
+		ip link set dev $NETDEV down
+	done
+
+	log_test "Trap group statistics"
+}
+
+port_del_test()
+{
+	local group_name
+	local i
+
+	# The test never fails. It is meant to exercise different code paths
+	# and make sure we properly dismantle a port while packets are
+	# in-flight.
+	RET=0
+
+	devlink_traps_enable_all
+
+	for i in $(seq 1 10); do
+		ip link set dev $NETDEV up
+
+		sleep $SLEEP_TIME
+
+		netdevsim_port_destroy
+		netdevsim_port_create
+		udevadm settle
+	done
+
+	devlink_traps_disable_all
+
+	log_test "Port delete"
+}
+
+dev_del_test()
+{
+	local group_name
+	local i
+
+	# The test never fails. It is meant to exercise different code paths
+	# and make sure we properly unregister traps while packets are
+	# in-flight.
+	RET=0
+
+	devlink_traps_enable_all
+
+	for i in $(seq 1 10); do
+		ip link set dev $NETDEV up
+
+		sleep $SLEEP_TIME
+
+		cleanup
+		setup_prepare
+	done
+
+	devlink_traps_disable_all
+
+	log_test "Device delete"
+}
+
+netdevsim_dev_create()
+{
+	echo "$DEV_ADDR 0" > ${NETDEVSIM_PATH}/new_device
+}
+
+netdevsim_dev_destroy()
+{
+	echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+}
+
+netdevsim_port_create()
+{
+	echo 1 > ${NETDEVSIM_PATH}/devices/${DEV}/new_port
+}
+
+netdevsim_port_destroy()
+{
+	echo 1 > ${NETDEVSIM_PATH}/devices/${DEV}/del_port
+}
+
+setup_prepare()
+{
+	local netdev
+
+	netdevsim_dev_create
+
+	if [ ! -d "${NETDEVSIM_PATH}/devices/${DEV}" ]; then
+		echo "Failed to create netdevsim device"
+		exit 1
+	fi
+
+	netdevsim_port_create
+
+	if [ ! -d "${NETDEVSIM_PATH}/devices/${DEV}/net/" ]; then
+		echo "Failed to create netdevsim port"
+		exit 1
+	fi
+
+	# Wait for udev to rename newly created netdev.
+	udevadm settle
+
+	NETDEV=$(ls ${NETDEVSIM_PATH}/devices/${DEV}/net/)
+}
+
+cleanup()
+{
+	pre_cleanup
+	netdevsim_port_destroy
+	netdevsim_dev_destroy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v3 16/16] Documentation: Add a section for devlink-trap testing
From: Ido Schimmel @ 2019-08-17 13:28 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190817132825.29790-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
 Documentation/networking/devlink-trap.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/networking/devlink-trap.rst b/Documentation/networking/devlink-trap.rst
index fe4f6e149623..c20c7c483664 100644
--- a/Documentation/networking/devlink-trap.rst
+++ b/Documentation/networking/devlink-trap.rst
@@ -196,3 +196,13 @@ narrow. The description of these groups must be added to the following table:
    * - ``buffer_drops``
      - Contains packet traps for packets that were dropped by the device due to
        an enqueue decision
+
+Testing
+=======
+
+See ``tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh`` for a
+test covering the core infrastructure. Test cases should be added for any new
+functionality.
+
+Device drivers should focus their tests on device-specific functionality, such
+as the triggering of supported packet traps.
-- 
2.21.0


^ permalink raw reply related

* Re: [RFC PATCH bpf-next 00/14] xdp_flow: Flow offload to XDP
From: Toshiaki Makita @ 2019-08-17 14:01 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Stanislav Fomichev, Alexei Starovoitov, Daniel Borkmann,
	Martin KaFai Lau, Song Liu, Yonghong Song, David S. Miller,
	Jesper Dangaard Brouer, John Fastabend, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, bpf, William Tu
In-Reply-To: <20190816115224.6aafd4ee@cakuba.netronome.com>

On 19/08/17 (土) 3:52:24, Jakub Kicinski wrote:
> On Fri, 16 Aug 2019 10:28:10 +0900, Toshiaki Makita wrote:
>> On 2019/08/16 4:22, Jakub Kicinski wrote:
>>> There's a certain allure in bringing the in-kernel BPF translation
>>> infrastructure forward. OTOH from system architecture perspective IMHO
>>> it does seem like a task best handed in user space. bpfilter can replace
>>> iptables completely, here we're looking at an acceleration relatively
>>> loosely coupled with flower.
>>
>> I don't think it's loosely coupled. Emulating TC behavior in userspace
>> is not so easy.
>>
>> Think about recent multi-mask support in flower. Previously userspace could
>> assume there is one mask and hash table for each preference in TC. After the
>> change TC accepts different masks with the same pref. Such a change tends to
>> break userspace emulation. It may ignore masks passed from flow insertion
>> and use the mask remembered when the first flow of the pref is inserted. It
>> may override the mask of all existing flows with the pref. It may fail to
>> insert such flows. Any of them would result in unexpected wrong datapath
>> handling which is critical.
>> I think such an emulation layer needs to be updated in sync with TC.
> 
> Oh, so you're saying that if xdp_flow is merged all patches to
> cls_flower and netfilter which affect flow offload will be required
> to update xdp_flow as well?

Hmm... you are saying that we are allowed to break other in-kernel 
subsystem by some change? Sounds strange...

> That's a question of policy. Technically the implementation in user
> space is equivalent.
 >
> The advantage of user space implementation is that you can add more
> to it and explore use cases which do not fit in the flow offload API,
> but are trivial for BPF. Not to mention the obvious advantage of
> decoupling the upgrade path.

I understand the advantage, but I can't trust such a third-party kernel 
emulation solution for this kind of thing which handles critical data path.

> Personally I'm not happy with the way this patch set messes with the
> flow infrastructure. You should use the indirect callback
> infrastructure instead, and that way you can build the whole thing
> touching none of the flow offload core.

I don't want to mess up the core flow infrastructure either. I'm all 
ears about less invasive ways. Using indirect callback sounds like a 
good idea. Will give it a try. Many thanks.

Toshiaki Makita

^ permalink raw reply

* Re: [PATCH v6 4/4] net: phy: realtek: Add LED configuration support for RTL8211E
From: Pavel Machek @ 2019-08-17 14:05 UTC (permalink / raw)
  To: Matthias Kaehlcke, jacek.anaszewski, linux-leds, dmurphy
  Cc: David S . Miller, Rob Herring, Mark Rutland, Andrew Lunn,
	Florian Fainelli, Heiner Kallweit, netdev, devicetree,
	linux-kernel, Douglas Anderson
In-Reply-To: <20190816212728.GW250418@google.com>

[-- Attachment #1: Type: text/plain, Size: 1409 bytes --]

On Fri 2019-08-16 14:27:28, Matthias Kaehlcke wrote:
> On Fri, Aug 16, 2019 at 10:13:42PM +0200, Pavel Machek wrote:
> > On Tue 2019-08-13 12:11:47, Matthias Kaehlcke wrote:
> > > Add a .config_led hook which is called by the PHY core when
> > > configuration data for a PHY LED is available. Each LED can be
> > > configured to be solid 'off, solid 'on' for certain (or all)
> > > link speeds or to blink on RX/TX activity.
> > > 
> > > Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> > 
> > THis really needs to go through the LED subsystem,
> 
> Sorry, I used what get_maintainers.pl threw at me, I should have
> manually cc-ed the LED list.
> 
> > and use the same userland interfaces as the rest of the system.
> 
> With the PHY maintainers we discussed to define a binding that is
> compatible with that of the LED one, to have the option to integrate
> it with the LED subsystem later. The integration itself is beyond the
> scope of this patchset.

Yes, I believe the integration is neccessary. Using same binding is
neccessary for that, but not sufficient. For example, we need
compatible trigger names, too.

So... I'd really like to see proper integration is possible before we
merge this.

Best regards,

									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply

* Re: [RFC PATCH bpf-next 00/14] xdp_flow: Flow offload to XDP
From: Toshiaki Makita @ 2019-08-17 14:10 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Alexei Starovoitov, Daniel Borkmann, Martin KaFai Lau, Song Liu,
	Yonghong Song, David S. Miller, Jakub Kicinski,
	Jesper Dangaard Brouer, John Fastabend, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, bpf, William Tu
In-Reply-To: <20190816153550.GO2820@mini-arch>

On 19/08/17 (土) 0:35:50, Stanislav Fomichev wrote:
> On 08/16, Toshiaki Makita wrote:
>> On 2019/08/16 0:21, Stanislav Fomichev wrote:
>>> On 08/15, Toshiaki Makita wrote:
>>>> On 2019/08/15 2:07, Stanislav Fomichev wrote:
>>>>> On 08/13, Toshiaki Makita wrote:
>>>>>> * Implementation
>>>>>>
>>>>>> xdp_flow makes use of UMH to load an eBPF program for XDP, similar to
>>>>>> bpfilter. The difference is that xdp_flow does not generate the eBPF
>>>>>> program dynamically but a prebuilt program is embedded in UMH. This is
>>>>>> mainly because flow insertion is considerably frequent. If we generate
>>>>>> and load an eBPF program on each insertion of a flow, the latency of the
>>>>>> first packet of ping in above test will incease, which I want to avoid.
>>>>> Can this be instead implemented with a new hook that will be called
>>>>> for TC events? This hook can write to perf event buffer and control
>>>>> plane will insert/remove/modify flow tables in the BPF maps (contol
>>>>> plane will also install xdp program).
>>>>>
>>>>> Why do we need UMH? What am I missing?
>>>>
>>>> So you suggest doing everything in xdp_flow kmod?
>>> You probably don't even need xdp_flow kmod. Add new tc "offload" mode
>>> (bypass) that dumps every command via netlink (or calls the BPF hook
>>> where you can dump it into perf event buffer) and then read that info
>>> from userspace and install xdp programs and modify flow tables.
>>> I don't think you need any kernel changes besides that stream
>>> of data from the kernel about qdisc/tc flow creation/removal/etc.
>>
>> My intention is to make more people who want high speed network easily use XDP,
>> so making transparent XDP offload with current TC interface.
>>
>> What userspace program would monitor TC events with your suggestion?
> Have a new system daemon (xdpflowerd) that is independently
> packaged/shipped/installed. Anybody who wants accelerated TC can
> download/install it. OVS can be completely unaware of this.

Thanks, but that's what I called an unreliable solution...

>> ovs-vswitchd? If so, it even does not need to monitor TC. It can
>> implement XDP offload directly.
>> (However I prefer kernel solution. Please refer to "About alternative
>> userland (ovs-vswitchd etc.) implementation" section in the cover letter.)
>>
>> Also such a TC monitoring solution easily can be out-of-sync with real TC
>> behavior as TC filter/flower is being heavily developed and changed,
>> e.g. introduction of TC block, support multiple masks with the same pref, etc.
>> I'm not sure such an unreliable solution have much value.
> This same issue applies to the in-kernel implementation, isn't it?
> What happens if somebody sends patches for a new flower feature but
> doesn't add appropriate xdp support? Do we reject them?

Why can we accept a patch which breaks other in-kernel subsystem...
Such patches can be applied accidentally but we are supposed to fix such 
problems in -rc phase, aren't we?

Toshiaki Makita

^ permalink raw reply

* Re: [PATCH net] tcp: make sure EPOLLOUT wont be missed
From: Jason Baron @ 2019-08-17 14:19 UTC (permalink / raw)
  To: Eric Dumazet, David S . Miller
  Cc: netdev, Soheil Hassas Yeganeh, Neal Cardwell, Eric Dumazet,
	Vladimir Rutsky
In-Reply-To: <20190817042622.91497-1-edumazet@google.com>



On 8/17/19 12:26 AM, Eric Dumazet wrote:
> As Jason Baron explained in commit 790ba4566c1a ("tcp: set SOCK_NOSPACE
> under memory pressure"), it is crucial we properly set SOCK_NOSPACE
> when needed.
> 
> However, Jason patch had a bug, because the 'nonblocking' status
> as far as sk_stream_wait_memory() is concerned is governed
> by MSG_DONTWAIT flag passed at sendmsg() time :
> 
>     long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
> 
> So it is very possible that tcp sendmsg() calls sk_stream_wait_memory(),
> and that sk_stream_wait_memory() returns -EAGAIN with SOCK_NOSPACE
> cleared, if sk->sk_sndtimeo has been set to a small (but not zero)
> value.

Is MSG_DONTWAIT not set in this case? The original patch was intended
only for the explicit non-blocking case. The epoll manpage says:
"EPOLLET flag should use nonblocking file descriptors". So the original
intention was not to impact the blocking case. This seems to me like
a different use-case.

Thanks,

-Jason


> This patch removes the 'noblock' variable since we must always
> set SOCK_NOSPACE if -EAGAIN is returned.
> 
> It also renames the do_nonblock label since we might reach this
> code path even if we were in blocking mode.
> 
> Fixes: 790ba4566c1a ("tcp: set SOCK_NOSPACE under memory pressure")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Jason Baron <jbaron@akamai.com>
> Reported-by: Vladimir Rutsky  <rutsky@google.com>
> ---
>  net/core/stream.c | 16 +++++++++-------
>  1 file changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/net/core/stream.c b/net/core/stream.c
> index e94bb02a56295ec2db34ab423a8c7c890df0a696..4f1d4aa5fb38d989a9c81f32dfce3f31bbc1fa47 100644
> --- a/net/core/stream.c
> +++ b/net/core/stream.c
> @@ -120,7 +120,6 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
>  	int err = 0;
>  	long vm_wait = 0;
>  	long current_timeo = *timeo_p;
> -	bool noblock = (*timeo_p ? false : true);
>  	DEFINE_WAIT_FUNC(wait, woken_wake_function);
>  
>  	if (sk_stream_memory_free(sk))
> @@ -133,11 +132,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
>  
>  		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
>  			goto do_error;
> -		if (!*timeo_p) {
> -			if (noblock)
> -				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
> -			goto do_nonblock;
> -		}
> +		if (!*timeo_p)
> +			goto do_eagain;
>  		if (signal_pending(current))
>  			goto do_interrupted;
>  		sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
> @@ -169,7 +165,13 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
>  do_error:
>  	err = -EPIPE;
>  	goto out;
> -do_nonblock:
> +do_eagain:
> +	/* Make sure that whenever EAGAIN is returned, EPOLLOUT event can
> +	 * be generated later.
> +	 * When TCP receives ACK packets that make room, tcp_check_space()
> +	 * only calls tcp_new_space() if SOCK_NOSPACE is set.
> +	 */
> +	set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
>  	err = -EAGAIN;
>  	goto out;
>  do_interrupted:
> 

^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Alexei Starovoitov @ 2019-08-17 15:02 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Jordan Glover, Andy Lutomirski, Daniel Colascione, Song Liu,
	Kees Cook, Networking, bpf, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Lorenz Bauer, Jann Horn, Greg KH, Linux API,
	LSM List
In-Reply-To: <alpine.DEB.2.21.1908162211270.1923@nanos.tec.linutronix.de>

On Fri, Aug 16, 2019 at 10:28:29PM +0200, Thomas Gleixner wrote:
> Alexei,
> 
> On Fri, 16 Aug 2019, Alexei Starovoitov wrote:
> > It's both of the above when 'systemd' is not taken literally.
> > To earlier Thomas's point: the use case is not only about systemd.
> > There are other containers management systems.
> 
> <SNIP>
> 
> > These daemons need to drop privileges to make the system safer == less
> > prone to corruption due to bugs in themselves. Not necessary security
> > bugs.
> 
> Let's take a step back.
> 
> While real usecases are helpful to understand a design decision, the design
> needs to be usecase independent.
> 
> The kernel provides mechanisms, not policies. My impression of this whole
> discussion is that it is policy driven. That's the wrong approach.

not sure what you mean by 'policy driven'.
Proposed CAP_BPF is a policy?

My desire to do kernel.unprivileged_bpf_disabled=1 is driven by
text in Documentation/x86/mds.rst which says:
"There is one exception, which is untrusted BPF. The functionality of
untrusted BPF is limited, but it needs to be thoroughly investigated
whether it can be used to create such a construct."

commit 6a9e52927251 ("x86/speculation/mds: Add mds_clear_cpu_buffers()")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Jon Masters <jcm@redhat.com>
Tested-by: Jon Masters <jcm@redhat.com>

The way I read this text:
- there is a concern that mds is exploitable via bpf
- there is a desire to investigate to address this concern

I'm committed to help with the investigation.

In the mean time I propose a path to do
kernel.unprivileged_bpf_disabled=1 which is CAP_BPF.

Can kernel.unprivileged_bpf_disabled=1 be used now?
Yes, but it will weaken overall system security because things that
use unpriv to load bpf and CAP_NET_ADMIN to attach bpf would need
to move to stronger CAP_SYS_ADMIN.

With CAP_BPF both load and attach would happen under CAP_BPF
instead of CAP_SYS_ADMIN.

> So let's look at the mechanisms which we have at hand:
> 
>  1) Capabilities
>  
>  2) SUID and dropping priviledges
> 
>  3) Seccomp and LSM
> 
> Now the real interesting questions are:
> 
>  A) What kind of restrictions does BPF allow? Is it a binary on/off or is
>     there a more finegrained control of BPF functionality?
> 
>     TBH, I can't tell.
> 
>  B) Depending on the answer to #A what is the control possibility for
>     #1/#2/#3 ?

Can any of the mechanisms 1/2/3 address the concern in mds.rst?

I believe Andy wants to expand the attack surface when
kernel.unprivileged_bpf_disabled=0
Before that happens I'd like the community to work on addressing the text above.

^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Alexei Starovoitov @ 2019-08-17 15:08 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Andy Lutomirski, Kees Cook, Andy Lutomirski, Song Liu, Networking,
	bpf, Alexei Starovoitov, Daniel Borkmann, Kernel Team,
	Lorenz Bauer, Jann Horn, Greg KH, Linux API, LSM List
In-Reply-To: <20190816222252.a7zizw7azkxnv3ot@wittgenstein>

On Sat, Aug 17, 2019 at 12:22:53AM +0200, Christian Brauner wrote:
> 
> (The one usecase I'd care about is to extend seccomp to do pointer-based
> syscall filtering. Whether or not that'd require (unprivileged) ebpf is
> up for discussion at KSummit.)

Kees have been always against using ebpf in seccomp. I believe he still
holds this opinion. Until he changes his mind let's stop bringing seccomp
as a use case for unpriv bpf.


^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Christian Brauner @ 2019-08-17 15:16 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Andy Lutomirski, Kees Cook, Andy Lutomirski, Song Liu, Networking,
	bpf, Alexei Starovoitov, Daniel Borkmann, Kernel Team,
	Lorenz Bauer, Jann Horn, Greg KH, Linux API, LSM List
In-Reply-To: <20190817150843.4vsmzpwpcvzndjld@ast-mbp>

On August 17, 2019 5:08:45 PM GMT+02:00, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>On Sat, Aug 17, 2019 at 12:22:53AM +0200, Christian Brauner wrote:
>> 
>> (The one usecase I'd care about is to extend seccomp to do
>pointer-based
>> syscall filtering. Whether or not that'd require (unprivileged) ebpf
>is
>> up for discussion at KSummit.)
>
>Kees have been always against using ebpf in seccomp. I believe he still
>holds this opinion. Until he changes his mind let's stop bringing
>seccomp
>as a use case for unpriv bpf.

That's why I said "whether or not".
For the record, I do prefer a non-unpriv-ebpf way.
It's still something that will most surely come up in the discussion though.

^ permalink raw reply

* Re: kernel BUG at include/linux/skbuff.h:LINE! (2)
From: syzbot @ 2019-08-17 15:28 UTC (permalink / raw)
  To: andriy.shevchenko, davem, edumazet, f.fainelli, idosch,
	kimbrownkd, linux-kernel, linux-sctp, marcelo.leitner, netdev,
	nhorman, syzkaller-bugs, tglx, vyasevich, wanghai26, yuehaibing
In-Reply-To: <0000000000008182a50590404a02@google.com>

syzbot has bisected this bug to:

commit bc389fd101e57b36aacfaec2df8fe479eabb44ea
Author: David S. Miller <davem@davemloft.net>
Date:   Tue Jul 2 21:12:30 2019 +0000

     Merge branch 'macsec-fix-some-bugs-in-the-receive-path'

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=125c5c4c600000
start commit:   459c5fb4 Merge branch 'mscc-PTP-support'
git tree:       net-next
final crash:    https://syzkaller.appspot.com/x/report.txt?x=115c5c4c600000
console output: https://syzkaller.appspot.com/x/log.txt?x=165c5c4c600000
kernel config:  https://syzkaller.appspot.com/x/.config?x=d4cf1ffb87d590d7
dashboard link: https://syzkaller.appspot.com/bug?extid=eb349eeee854e389c36d
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=111849e2600000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=1442c25a600000

Reported-by: syzbot+eb349eeee854e389c36d@syzkaller.appspotmail.com
Fixes: bc389fd101e5 ("Merge  
branch 'macsec-fix-some-bugs-in-the-receive-path'")

For information about bisection process see: https://goo.gl/tpsmEJ#bisection

^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Alexei Starovoitov @ 2019-08-17 15:36 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Andy Lutomirski, Kees Cook, Andy Lutomirski, Song Liu, Networking,
	bpf, Alexei Starovoitov, Daniel Borkmann, Kernel Team,
	Lorenz Bauer, Jann Horn, Greg KH, Linux API, LSM List
In-Reply-To: <61B88085-9FBB-41E6-9783-324E445E428D@ubuntu.com>

On Sat, Aug 17, 2019 at 05:16:53PM +0200, Christian Brauner wrote:
> On August 17, 2019 5:08:45 PM GMT+02:00, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> >On Sat, Aug 17, 2019 at 12:22:53AM +0200, Christian Brauner wrote:
> >> 
> >> (The one usecase I'd care about is to extend seccomp to do
> >pointer-based
> >> syscall filtering. Whether or not that'd require (unprivileged) ebpf
> >is
> >> up for discussion at KSummit.)
> >
> >Kees have been always against using ebpf in seccomp. I believe he still
> >holds this opinion. Until he changes his mind let's stop bringing
> >seccomp
> >as a use case for unpriv bpf.
> 
> That's why I said "whether or not".
> For the record, I do prefer a non-unpriv-ebpf way.
> It's still something that will most surely come up in the discussion though.

It's very un-kernely way to defer to in-person meetings.
If there is anything to discuss please discuss it on the public mailing list.


^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Christian Brauner @ 2019-08-17 15:42 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Andy Lutomirski, Kees Cook, Andy Lutomirski, Song Liu, Networking,
	bpf, Alexei Starovoitov, Daniel Borkmann, Kernel Team,
	Lorenz Bauer, Jann Horn, Greg KH, Linux API, LSM List
In-Reply-To: <20190817153652.zfcsklt474j72dzm@ast-mbp.dhcp.thefacebook.com>

On August 17, 2019 5:36:54 PM GMT+02:00, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>On Sat, Aug 17, 2019 at 05:16:53PM +0200, Christian Brauner wrote:
>> On August 17, 2019 5:08:45 PM GMT+02:00, Alexei Starovoitov
><alexei.starovoitov@gmail.com> wrote:
>> >On Sat, Aug 17, 2019 at 12:22:53AM +0200, Christian Brauner wrote:
>> >> 
>> >> (The one usecase I'd care about is to extend seccomp to do
>> >pointer-based
>> >> syscall filtering. Whether or not that'd require (unprivileged)
>ebpf
>> >is
>> >> up for discussion at KSummit.)
>> >
>> >Kees have been always against using ebpf in seccomp. I believe he
>still
>> >holds this opinion. Until he changes his mind let's stop bringing
>> >seccomp
>> >as a use case for unpriv bpf.
>> 
>> That's why I said "whether or not".
>> For the record, I do prefer a non-unpriv-ebpf way.
>> It's still something that will most surely come up in the discussion
>though.
>
>It's very un-kernely way to defer to in-person meetings.
>If there is anything to discuss please discuss it on the public mailing
>list.

https://lists.linuxfoundation.org/pipermail/ksummit-discuss/2019-July/006699.html

^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Andy Lutomirski @ 2019-08-17 15:44 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Thomas Gleixner, Jordan Glover, Andy Lutomirski,
	Daniel Colascione, Song Liu, Kees Cook, Networking, bpf,
	Alexei Starovoitov, Daniel Borkmann, Kernel Team, Lorenz Bauer,
	Jann Horn, Greg KH, Linux API, LSM List
In-Reply-To: <20190817150245.xxzxqjpvgqsxmloe@ast-mbp>

> On Aug 17, 2019, at 8:02 AM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> 
> Can any of the mechanisms 1/2/3 address the concern in mds.rst?
> 

seccomp() can. It’s straightforward to use seccomp to disable bpf() outright for a process tree.  In this regard, bpf() isn’t particularly unique — it’s a system call that exposes some attack surface and that isn’t required by most programs for basic functionality.

At LPC this year, there will be a discussion about seccomp improvements that will, among other things, offer fiber-grained control. It’s quite likely, for example, that seccomp will soon be able to enable and disable specific map types or attach types.  The exact mechanism isn’t decided yet,  but I think everyone expects that this is mostly a design problem, not an implementation problem.

This is off topic for the current thread, but it could be useful to allow bpf programs to be loaded from files directly (i.e. pass an fd to a file into bpf() to load the program), which would enable LSMs to check that the file is appropriately labeled. This would dramatically raise the bar for exploitation of verifier bugs or speculation attacks, since anyone trying to exploit it would need to get the bpf payload through LSM policy first.

> I believe Andy wants to expand the attack surface when
> kernel.unprivileged_bpf_disabled=0
> Before that happens I'd like the community to work on addressing the text above.
> 

Not by much. BPF maps are already largely exposed to unprivileged code (when unprivileged_bpf_disabled=0).  The attack surface is there, and they’re arguably even more exposed than they should be.  My patch 1 earlier was about locking these interfaces down.

Similarly, my suggestions about reworking cgroup attach and program load don’t actually allow fully unprivileged users to run arbitrary bpf() programs [0] — under my proposal, to attach a bpf cgroup program, you need a delegated cgroup. The mechanism could be extended by a requirement that a privileged cgroup manager explicitly enable certain attach types for a delegated subtree.

A cgroup knob to turn unprivileged bpf on and off for tasks in the cgroup might actually be quite useful.

[0] on some thought, the test run mechanism should probably remain root-only.

^ permalink raw reply

* Re: [PATCH 1/2] PTP: introduce new versions of IOCTLs
From: Richard Cochran @ 2019-08-17 15:59 UTC (permalink / raw)
  To: Felipe Balbi; +Cc: Christopher S Hall, netdev, linux-kernel
In-Reply-To: <20190814074712.10684-1-felipe.balbi@linux.intel.com>

On Wed, Aug 14, 2019 at 10:47:11AM +0300, Felipe Balbi wrote:
> The current version of the IOCTL have a small problem which prevents us
> from extending the API by making use of reserved fields. In these new
> IOCTLs, we are now making sure that flags and rsv fields are zero which
> will allow us to extend the API in the future.
> 
> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
> ---
>  drivers/ptp/ptp_chardev.c      | 58 ++++++++++++++++++++++++++++++++--
>  include/uapi/linux/ptp_clock.h | 12 +++++++
>  2 files changed, 68 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
> index 18ffe449efdf..204212fc3f8c 100644
> --- a/drivers/ptp/ptp_chardev.c
> +++ b/drivers/ptp/ptp_chardev.c
> @@ -123,9 +123,11 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
>  	struct timespec64 ts;
>  	int enable, err = 0;
>  
> +	memset(&req, 0, sizeof(req));

Nit: please leave a blank line between memset() and switch/case.

>  	switch (cmd) {
>  
>  	case PTP_CLOCK_GETCAPS:
> +	case PTP_CLOCK_GETCAPS2:
>  		memset(&caps, 0, sizeof(caps));
>  		caps.max_adj = ptp->info->max_adj;
>  		caps.n_alarm = ptp->info->n_alarm;

Reviewed-by: Richard Cochran <richardcochran@gmail.com>


^ permalink raw reply

* Re: [PATCH 2/2] PTP: add support for one-shot output
From: Richard Cochran @ 2019-08-17 16:03 UTC (permalink / raw)
  To: Felipe Balbi; +Cc: Christopher S Hall, netdev, linux-kernel
In-Reply-To: <20190814074712.10684-2-felipe.balbi@linux.intel.com>

On Wed, Aug 14, 2019 at 10:47:12AM +0300, Felipe Balbi wrote:
> diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
> index 039cd62ec706..9412b16cc8ed 100644
> --- a/include/uapi/linux/ptp_clock.h
> +++ b/include/uapi/linux/ptp_clock.h
> @@ -67,7 +67,9 @@ struct ptp_perout_request {
>  	struct ptp_clock_time start;  /* Absolute start time. */
>  	struct ptp_clock_time period; /* Desired period, zero means disable. */
>  	unsigned int index;           /* Which channel to configure. */
> -	unsigned int flags;           /* Reserved for future use. */
> +
> +#define PTP_PEROUT_ONE_SHOT BIT(0)
> +	unsigned int flags;           /* Bit 0 -> oneshot output. */

The .flags field doesn't need this comment.  The individual BIT macro
names should be clear enough, and if not, then comment the macros.

>  	unsigned int rsv[4];          /* Reserved for future use. */
>  };
>  
> -- 
> 2.22.0
> 

Reviewed-by: Richard Cochran <richardcochran@gmail.com>

^ permalink raw reply

* Re: [PATCH 1/2] PTP: introduce new versions of IOCTLs
From: Joe Perches @ 2019-08-17 16:17 UTC (permalink / raw)
  To: Richard Cochran, Felipe Balbi; +Cc: Christopher S Hall, netdev, linux-kernel
In-Reply-To: <20190817155927.GA1540@localhost>

On Sat, 2019-08-17 at 08:59 -0700, Richard Cochran wrote:
> On Wed, Aug 14, 2019 at 10:47:11AM +0300, Felipe Balbi wrote:
> > The current version of the IOCTL have a small problem which prevents us
> > from extending the API by making use of reserved fields. In these new
> > IOCTLs, we are now making sure that flags and rsv fields are zero which
> > will allow us to extend the API in the future.
> > 
> > Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
> > ---
> >  drivers/ptp/ptp_chardev.c      | 58 ++++++++++++++++++++++++++++++++--
> >  include/uapi/linux/ptp_clock.h | 12 +++++++
> >  2 files changed, 68 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
[]
> > @@ -123,9 +123,11 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
> >  	struct timespec64 ts;
> >  	int enable, err = 0;
> >  
> > +	memset(&req, 0, sizeof(req));
> 
> Nit: please leave a blank line between memset() and switch/case.

or just initialize the declaration of req with = {}

Is there a case where this initialization is
unnecessary such that it impacts performance
given the use in ptp_ioctl?

caps for instance is memset to zero only in
PTP_CLOCK_GETCAP

req is used in only 3 of the case blocks.

	case PTP_EXTTS_REQUEST:
	case PTP_PEROUT_REQUEST:
	case PTP_ENABLE_PPS:

Maybe it would be better to move the memset(&req...)
into each of the case blocks.

> >  	switch (cmd) {
> >  
> >  	case PTP_CLOCK_GETCAPS:
> > +	case PTP_CLOCK_GETCAPS2:
> >  		memset(&caps, 0, sizeof(caps));
> >  		caps.max_adj = ptp->info->max_adj;
> >  		caps.n_alarm = ptp->info->n_alarm;
> 
> Reviewed-by: Richard Cochran <richardcochran@gmail.com>
> 


^ permalink raw reply

* Re: [PATCH net] tcp: make sure EPOLLOUT wont be missed
From: Eric Dumazet @ 2019-08-17 16:26 UTC (permalink / raw)
  To: Jason Baron, Eric Dumazet, David S . Miller
  Cc: netdev, Soheil Hassas Yeganeh, Neal Cardwell, Eric Dumazet,
	Vladimir Rutsky
In-Reply-To: <b9ab6b03-664c-eb81-0fbd-6f696276d9aa@akamai.com>



On 8/17/19 4:19 PM, Jason Baron wrote:
> 
> 
> On 8/17/19 12:26 AM, Eric Dumazet wrote:
>> As Jason Baron explained in commit 790ba4566c1a ("tcp: set SOCK_NOSPACE
>> under memory pressure"), it is crucial we properly set SOCK_NOSPACE
>> when needed.
>>
>> However, Jason patch had a bug, because the 'nonblocking' status
>> as far as sk_stream_wait_memory() is concerned is governed
>> by MSG_DONTWAIT flag passed at sendmsg() time :
>>
>>     long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
>>
>> So it is very possible that tcp sendmsg() calls sk_stream_wait_memory(),
>> and that sk_stream_wait_memory() returns -EAGAIN with SOCK_NOSPACE
>> cleared, if sk->sk_sndtimeo has been set to a small (but not zero)
>> value.
> 
> Is MSG_DONTWAIT not set in this case? The original patch was intended
> only for the explicit non-blocking case. The epoll manpage says:
> "EPOLLET flag should use nonblocking file descriptors". So the original
> intention was not to impact the blocking case. This seems to me like
> a different use-case.
>

I guess the problem is how we define 'non-blocking' ...

SO_SNDTIMEO can be used by application to implement a variation of non-blocking,
by waiting for a socket event with a short timeout, to maybe recover
from memory pressure conditions in a more efficient way than simply looping.

Note that the man page for epoll() only _suggests_ to use nonblocking file descriptors.

<quote>
       The  suggested  way  to use epoll as an edge-triggered (EPOLLET)
       interface is as follows:

              i   with nonblocking file descriptors; and

              ii  by  waiting  for  an  event  only  after  read(2)  or
                  write(2) return EAGAIN.
</quote>









^ permalink raw reply

* Re: 5.3-rc3-ish VM crash: RIP: 0010:tcp_trim_head+0x20/0xe0
From: Eric Dumazet @ 2019-08-17 16:35 UTC (permalink / raw)
  To: Sander Eikelenboom, Eric Dumazet, netdev, linux-kernel
In-Reply-To: <674de4ab-c37f-7787-f95a-3ae0f52bc196@eikelenboom.it>



On 8/17/19 10:24 AM, Sander Eikelenboom wrote:
> On 12/08/2019 19:56, Eric Dumazet wrote:
>>
>>
>> On 8/12/19 2:50 PM, Sander Eikelenboom wrote:
>>> L.S.,
>>>
>>> While testing a somewhere-after-5.3-rc3 kernel (which included the latest net merge (33920f1ec5bf47c5c0a1d2113989bdd9dfb3fae9),
>>> one of my Xen VM's (which gets quite some network load) crashed.
>>> See below for the stacktrace.
>>>
>>> Unfortunately I haven't got a clear trigger, so bisection doesn't seem to be an option at the moment. 
>>> I haven't encountered this on 5.2, so it seems to be an regression against 5.2.
>>>
>>> Any ideas ?
>>>
>>> --
>>> Sander
>>>
>>>
>>> [16930.653595] general protection fault: 0000 [#1] SMP NOPTI
>>> [16930.653624] CPU: 0 PID: 3275 Comm: rsync Not tainted 5.3.0-rc3-20190809-doflr+ #1
>>> [16930.653657] RIP: 0010:tcp_trim_head+0x20/0xe0
>>> [16930.653677] Code: 2e 0f 1f 84 00 00 00 00 00 90 41 54 41 89 d4 55 48 89 fd 53 48 89 f3 f6 46 7e 01 74 2f 8b 86 bc 00 00 00 48 03 86 c0 00 00 00 <8b> 40 20 66 83 f8 01 74 19 31 d2 31 f6 b9 20 0a 00 00 48 89 df e8
>>> [16930.653741] RSP: 0000:ffffc90000003ad8 EFLAGS: 00010286
>>> [16930.653762] RAX: fffe888005bf62c0 RBX: ffff8880115fb800 RCX: 000000008010000b
>>
>> crash in " mov    0x20(%rax),%eax"   and RAX=fffe888005bf62c0 (not a valid kernel address)
>>
>> Look like one bit corruption maybe.
>>
>> Nothing comes to mind really between 5.2 and 53 that could explain this.
>>
>>> [16930.653791] RDX: 00000000000005a0 RSI: ffff8880115fb800 RDI: ffff888016b00880
>>> [16930.653819] RBP: ffff888016b00880 R08: 0000000000000001 R09: 0000000000000000
>>> [16930.653848] R10: ffff88800ae00800 R11: 00000000bfe632e6 R12: 00000000000005a0
>>> [16930.653875] R13: 0000000000000001 R14: 00000000bfe62d46 R15: 0000000000000004
>>> [16930.653913] FS:  00007fe71fe2cb80(0000) GS:ffff88801f200000(0000) knlGS:0000000000000000
>>> [16930.653943] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> [16930.653965] CR2: 000055de0f3e7000 CR3: 0000000011f32000 CR4: 00000000000006f0
>>> [16930.653993] Call Trace:
>>> [16930.654005]  <IRQ>
>>> [16930.654018]  tcp_ack+0xbb0/0x1230
>>> [16930.654033]  tcp_rcv_established+0x2e8/0x630
>>> [16930.654053]  tcp_v4_do_rcv+0x129/0x1d0
>>> [16930.654070]  tcp_v4_rcv+0xac9/0xcb0
>>> [16930.654088]  ip_protocol_deliver_rcu+0x27/0x1b0
>>> [16930.654109]  ip_local_deliver_finish+0x3f/0x50
>>> [16930.654128]  ip_local_deliver+0x4d/0xe0
>>> [16930.654145]  ? ip_protocol_deliver_rcu+0x1b0/0x1b0
>>> [16930.654163]  ip_rcv+0x4c/0xd0
>>> [16930.654179]  __netif_receive_skb_one_core+0x79/0x90
>>> [16930.654200]  netif_receive_skb_internal+0x2a/0xa0
>>> [16930.654219]  napi_gro_receive+0xe7/0x140
>>> [16930.654237]  xennet_poll+0x9be/0xae0
>>> [16930.654254]  net_rx_action+0x136/0x340
>>> [16930.654271]  __do_softirq+0xdd/0x2cf
>>> [16930.654287]  irq_exit+0x7a/0xa0
>>> [16930.654304]  xen_evtchn_do_upcall+0x27/0x40
>>> [16930.654320]  xen_hvm_callback_vector+0xf/0x20
>>> [16930.654339]  </IRQ>
>>> [16930.654349] RIP: 0033:0x55de0d87db99
>>> [16930.654364] Code: 00 00 48 89 7c 24 f8 45 39 fe 45 0f 42 fe 44 89 7c 24 f4 eb 09 0f 1f 40 00 83 e9 01 74 3e 89 f2 48 63 f8 4c 01 d2 44 38 1c 3a <75> 25 44 38 6c 3a ff 75 1e 41 0f b6 3c 24 40 38 3a 75 14 41 0f b6
>>> [16930.654432] RSP: 002b:00007ffd5531eec8 EFLAGS: 00000a87 ORIG_RAX: ffffffffffffff0c
>>> [16930.655004] RAX: 0000000000000002 RBX: 000055de0f3e8e50 RCX: 000000000000007f
>>> [16930.655034] RDX: 000055de0f3dc2d2 RSI: 0000000000003492 RDI: 0000000000000002
>>> [16930.655062] RBP: 0000000000007fff R08: 00000000000080ea R09: 00000000000001f0
>>> [16930.655089] R10: 000055de0f3d8e40 R11: 0000000000000094 R12: 000055de0f3e0f2a
>>> [16930.655116] R13: 0000000000000010 R14: 0000000000007f16 R15: 0000000000000080
>>> [16930.655144] Modules linked in:
>>> [16930.655200] ---[ end trace 533367c95501b645 ]---
>>> [16930.655223] RIP: 0010:tcp_trim_head+0x20/0xe0
>>> [16930.655243] Code: 2e 0f 1f 84 00 00 00 00 00 90 41 54 41 89 d4 55 48 89 fd 53 48 89 f3 f6 46 7e 01 74 2f 8b 86 bc 00 00 00 48 03 86 c0 00 00 00 <8b> 40 20 66 83 f8 01 74 19 31 d2 31 f6 b9 20 0a 00 00 48 89 df e8
>>> [16930.655312] RSP: 0000:ffffc90000003ad8 EFLAGS: 00010286
>>> [16930.655331] RAX: fffe888005bf62c0 RBX: ffff8880115fb800 RCX: 000000008010000b
>>> [16930.655360] RDX: 00000000000005a0 RSI: ffff8880115fb800 RDI: ffff888016b00880
>>> [16930.655387] RBP: ffff888016b00880 R08: 0000000000000001 R09: 0000000000000000
>>> [16930.655414] R10: ffff88800ae00800 R11: 00000000bfe632e6 R12: 00000000000005a0
>>> [16930.655441] R13: 0000000000000001 R14: 00000000bfe62d46 R15: 0000000000000004
>>> [16930.655475] FS:  00007fe71fe2cb80(0000) GS:ffff88801f200000(0000) knlGS:0000000000000000
>>> [16930.655502] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> [16930.655525] CR2: 000055de0f3e7000 CR3: 0000000011f32000 CR4: 00000000000006f0
>>> [16930.655553] Kernel panic - not syncing: Fatal exception in interrupt
>>> [16930.655789] Kernel Offset: disabled
>>>
> 
> Hi Eric,
> 
> Got another VM crash, with a slightly different stacktrace this time around.
> Still networking though.
> 
> --
> Sander
> 
> [112522.697498] general protection fault: 0000 [#1] SMP NOPTI
> [112522.697555] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.0-rc4-20190812-doflr+ #1
> [112522.697592] RIP: 0010:skb_shift+0x63/0x430
> [112522.697608] Code: bc 00 00 00 48 03 8f c0 00 00 00 f6 41 03 08 74 07 48 83 79 28 00 75 d0 8b 8e bc 00 00 00 48 03 8e c0 00 00 00 48 85 f6 74 0a <f6> 41 03 08 0f 85 09 03 00 00 49 89 fd 8b bf bc 00 00 00 41 89 



crash in "testb  $0x8,0x3(%rcx)"  with RCX==fffe8880117da6c0

Same strange looking address on x86_64

I have no idea.

> [112522.697673] RSP: 0018:ffffc900000039b0 EFLAGS: 00010286
> [112522.697693] RAX: 00000000000005a0 RBX: ffff8880117fb800 RCX: fffe8880117da6c0
> [112522.697721] RDX: 00000000000005a0 RSI: ffff8880117fb800 RDI: ffff88800ae58000
> [112522.697748] RBP: ffffc900000039e8 R08: 000000000004cfe0 R09: 00000000000005a0
> [112522.697775] R10: 00000000000005a0 R11: ffff8880117fb800 R12: 0000000000000000
> [112522.697803] R13: 00000000c95a98c2 R14: 0000000000000000 R15: ffff88800ae58000
> [112522.697839] FS:  0000000000000000(0000) GS:ffff88801f200000(0000) knlGS:0000000000000000
> [112522.697869] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [112522.697895] CR2: 00007f9210d8e078 CR3: 000000000b660000 CR4: 00000000000006f0
> [112522.697925] Call Trace:
> [112522.697938]  <IRQ>
> [112522.697951]  tcp_sacktag_walk+0x2af/0x480
> [112522.697967]  tcp_sacktag_write_queue+0x34d/0x820
> [112522.697986]  ? ip_forward_options.cold.0+0x1c/0x1c
> [112522.698007]  tcp_ack+0xb8c/0x1230
> [112522.698023]  ? tcp_event_new_data_sent+0x4a/0x90
> [112522.698043]  tcp_rcv_established+0x14c/0x630
> [112522.698064]  tcp_v4_do_rcv+0x129/0x1d0
> [112522.698081]  tcp_v4_rcv+0xac9/0xcb0
> [112522.698099]  ip_protocol_deliver_rcu+0x27/0x1b0
> [112522.698119]  ip_local_deliver_finish+0x3f/0x50
> [112522.698139]  ip_local_deliver+0x4d/0xe0
> [112522.698155]  ? ip_protocol_deliver_rcu+0x1b0/0x1b0
> [112522.698177]  ip_rcv+0x4c/0xd0
> [112522.698194]  __netif_receive_skb_one_core+0x79/0x90
> [112522.698215]  netif_receive_skb_internal+0x2a/0xa0
> [112522.698237]  napi_gro_receive+0xe7/0x140
> [112522.698255]  xennet_poll+0x9be/0xae0
> [112522.698271]  net_rx_action+0x136/0x340
> [112522.698288]  __do_softirq+0xdd/0x2cf
> [112522.698304]  irq_exit+0x7a/0xa0
> [112522.698321]  xen_evtchn_do_upcall+0x27/0x40
> [112522.698340]  xen_hvm_callback_vector+0xf/0x20
> [112522.698359]  </IRQ>
> [112522.698373] RIP: 0010:native_safe_halt+0xe/0x10
> [112522.698392] Code: 48 8b 04 25 c0 6b 01 00 f0 80 48 02 20 48 8b 00 a8 08 75 c4 eb 80 90 90 90 90 90 90 e9 07 00 00 00 0f 00 2d 54 fb 41 00 fb f4 <c3> 90 e9 07 00 00 00 0f 00 2d 44 fb 41 00 f4 c3 90 90 41 55 41 54
> [112522.699522] RSP: 0018:ffffffff82a03e90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff0c
> [112522.699552] RAX: 0001a54800000000 RBX: 0000000000000000 RCX: 0000000000000001
> [112522.699580] RDX: 0000000002b9f9b6 RSI: 0000000000000087 RDI: 0000000000000000
> [112522.699608] RBP: 0000000000000000 R08: 000000001eb5c3cb R09: ffffffff82a08460
> [112522.699634] R10: 000000000002e46e R11: 0000000000000000 R12: 0000000000000000
> [112522.699662] R13: 0000000000000000 R14: ffffffff8326e0a0 R15: 0000000000000000
> [112522.699692]  default_idle+0x17/0x140
> [112522.699709]  do_idle+0x1ee/0x210
> [112522.699726]  cpu_startup_entry+0x14/0x20
> [112522.699743]  start_kernel+0x4e9/0x50b
> [112522.699760]  secondary_startup_64+0xa4/0xb0
> [112522.699780] Modules linked in:
> [112522.699829] ---[ end trace 3b8db3603485e952 ]---
> [112522.699850] RIP: 0010:skb_shift+0x63/0x430
> [112522.699866] Code: bc 00 00 00 48 03 8f c0 00 00 00 f6 41 03 08 74 07 48 83 79 28 00 75 d0 8b 8e bc 00 00 00 48 03 8e c0 00 00 00 48 85 f6 74 0a <f6> 41 03 08 0f 85 09 03 00 00 49 89 fd 8b bf bc 00 00 00 41 89 d4
> [112522.699938] RSP: 0018:ffffc900000039b0 EFLAGS: 00010286
> [112522.699959] RAX: 00000000000005a0 RBX: ffff8880117fb800 RCX: fffe8880117da6c0
> [112522.699986] RDX: 00000000000005a0 RSI: ffff8880117fb800 RDI: ffff88800ae58000
> [112522.700013] RBP: ffffc900000039e8 R08: 000000000004cfe0 R09: 00000000000005a0
> [112522.700041] R10: 00000000000005a0 R11: ffff8880117fb800 R12: 0000000000000000
> [112522.700067] R13: 00000000c95a98c2 R14: 0000000000000000 R15: ffff88800ae58000
> [112522.700111] FS:  0000000000000000(0000) GS:ffff88801f200000(0000) knlGS:0000000000000000
> [112522.700140] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [112522.700165] CR2: 00007f9210d8e078 CR3: 000000000b660000 CR4: 00000000000006f0
> [112522.700201] Kernel panic - not syncing: Fatal exception in interrupt
> [112522.702992] Kernel Offset: disabled
> 
> 

^ permalink raw reply

* Re: [PATCH net] tcp: make sure EPOLLOUT wont be missed
From: Neal Cardwell @ 2019-08-17 17:10 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, netdev, Soheil Hassas Yeganeh, Eric Dumazet,
	Jason Baron, Vladimir Rutsky
In-Reply-To: <20190817042622.91497-1-edumazet@google.com>

On Sat, Aug 17, 2019 at 12:26 AM Eric Dumazet <edumazet@google.com> wrote:
>
> As Jason Baron explained in commit 790ba4566c1a ("tcp: set SOCK_NOSPACE
> under memory pressure"), it is crucial we properly set SOCK_NOSPACE
> when needed.
>
> However, Jason patch had a bug, because the 'nonblocking' status
> as far as sk_stream_wait_memory() is concerned is governed
> by MSG_DONTWAIT flag passed at sendmsg() time :
>
>     long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
>
> So it is very possible that tcp sendmsg() calls sk_stream_wait_memory(),
> and that sk_stream_wait_memory() returns -EAGAIN with SOCK_NOSPACE
> cleared, if sk->sk_sndtimeo has been set to a small (but not zero)
> value.
>
> This patch removes the 'noblock' variable since we must always
> set SOCK_NOSPACE if -EAGAIN is returned.
>
> It also renames the do_nonblock label since we might reach this
> code path even if we were in blocking mode.
>
> Fixes: 790ba4566c1a ("tcp: set SOCK_NOSPACE under memory pressure")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Jason Baron <jbaron@akamai.com>
> Reported-by: Vladimir Rutsky  <rutsky@google.com>
> ---
>  net/core/stream.c | 16 +++++++++-------
>  1 file changed, 9 insertions(+), 7 deletions(-)

Acked-by: Neal Cardwell <ncardwell@google.com>

Thanks, Eric!

neal

^ permalink raw reply

* Re: [PATCH RFC net-next 3/3] net: dsa: mv88e6xxx: setup SERDES irq also for CPU/DSA ports
From: Marek Behun @ 2019-08-17 18:03 UTC (permalink / raw)
  To: Vivien Didelot; +Cc: netdev, Andrew Lunn, Vladimir Oltean, Florian Fainelli
In-Reply-To: <20190816190537.GB14714@t480s.localdomain>

Hi Vivien,

On Fri, 16 Aug 2019 19:05:37 -0400
Vivien Didelot <vivien.didelot@gmail.com> wrote:

> I think the DSA switch port_setup/port_teardown operations are fine, but the
> idea would be that the drivers must no longer setup their ports directly
> in their .setup function. So for mv88e6xxx precisely, we should rename
> mv88e6xxx_setup_port to mv88e6xxx_port_setup, and move all the port-related
> code from mv88e6xxx_setup into mv88e6xxx_port_setup.

I looked into the driver, and found out that mv88e6xxx_setup calls many
other setup functions after the calls to mv88e6xxx_setup_port for each
port:
   1. setup errata
   2. cache cmode
   3. for each port setup_port
   4. irl setup
   5. mac setup
   6. phy setup
   7. vtu setup
   8. pvt setup
   9. atu setup
  10. broadcast setuo
  11. pot setup
  12. rmu setup
  13. rsvd2cpu setup
  14. trunk setup
  15. devmap setup
  16. pri setup
  17. ptp setup
  18. hwtstamp setup
  19. stats setup

The problem is that some of these steps (after step 3) may depend on
some of the work done by step 3. Some of these functions iterate again
over the port array (mv88e6xxx_hwtstamp_setup, for example).
We cannot simply move step 3 to be called from DSA after
mv88e6xxx_setup.

I now do not know exactly what to do about the error prone naming of
setup_port vs port_setup.

One way would be to rename the mv88e6xxx_setup_port function to
mv88e6xxx_setup_port_regs, or mv88e6xxx_port_pre_setup, or something
like that. Would the names mv88e6xxx_port_setup and
mv88e6xxx_setup_port_regs still be very confusing and error prone?
I think maybe yes...

Other solution would be to, instead of the .port_setup()
and .port_teardown() DSA ops, create the .after_setup()
and .before_teardown() ops I mentioned in the previous mail.

And yet another (in my opinion very improper) solution could be that
the .setup() method could call dsa_port_setup() from within itself, to
ensure that the needed structres exist.

Please let me know what you think about this.

The first solution to me currently seems as the easiest.

Marek

^ permalink raw reply

* Re: [PATCH RFC net-next 3/3] net: dsa: mv88e6xxx: setup SERDES irq also for CPU/DSA ports
From: Marek Behun @ 2019-08-17 18:15 UTC (permalink / raw)
  To: Vivien Didelot; +Cc: netdev, Andrew Lunn, Vladimir Oltean, Florian Fainelli
In-Reply-To: <20190817200342.567c13c4@nic.cz>

On Sat, 17 Aug 2019 20:03:42 +0200
Marek Behun <marek.behun@nic.cz> wrote:

> One way would be to rename the mv88e6xxx_setup_port function to
> mv88e6xxx_setup_port_regs, or mv88e6xxx_port_pre_setup, or something
> like that. Would the names mv88e6xxx_port_setup and
> mv88e6xxx_setup_port_regs still be very confusing and error prone?
> I think maybe yes...
> 
> Other solution would be to, instead of the .port_setup()
> and .port_teardown() DSA ops, create the .after_setup()
> and .before_teardown() ops I mentioned in the previous mail.
> 
> And yet another (in my opinion very improper) solution could be that
> the .setup() method could call dsa_port_setup() from within itself, to
> ensure that the needed structres exist.

I thought of another solution, one that does not need new DSA
operations. What if dsa_port_enable was called for CPU/DSA ports after
in dsa_port_setup_switches, after all ports are setup, and
dsa_port_disable called for CPU/DSA ports in dsa_port_teardown_switches?

This seems to me as cleaner solution.

Marek

^ permalink raw reply

* Re: Unable to create htb tc classes more than 64K
From: Cong Wang @ 2019-08-17 18:24 UTC (permalink / raw)
  To: Akshat Kakkar; +Cc: NetFilter, lartc, netdev
In-Reply-To: <CAA5aLPiqyhnWjY7A3xsaNJ71sDOf=Rqej8d+7=_PyJPmV9uApA@mail.gmail.com>

On Sat, Aug 17, 2019 at 5:46 AM Akshat Kakkar <akshat.1984@gmail.com> wrote:
>
> I agree that it is because of 16bit of minor I'd of class which
> restricts it to 64K.
> Point is, can we use multilevel qdisc and classes to extend it to more
> no. of classes i.e. to more than 64K classes

If your goal is merely having as many classes as you can, then yes.


>
> One scheme can be like
>                                       100: root qdisc
>                                          |
>                                        / | \
>                                      /   |   \
>                                    /     |     \
>                                  /       |       \
>                           100:1   100:2   100:3        child classes
>                             |              |           |
>                             |              |           |
>                             |              |           |
>                            1:            2:          3:     qdisc
>                            / \           / \           / \
>                          /     \                     /     \
>                       1:1    1:2             3:1      3:2 leaf classes
>
> with all qdisc and classes defined as htb.
>
> Is this correct approach? Any alternative??

Again, depends on what your goal is.


>
> Besides, in order to direct traffic to leaf classes 1:1, 1:2, 2:1,
> 2:2, 3:1, 3:2 .... , instead of using filters I am using ipset with
> skbprio and iptables map-set match rule.
> But even after all this it don't work. Why?

Again, the filters you use to classify the packets could only
work for the classes on the same level, no the next level.


Thanks.

^ permalink raw reply

* [PATCH net-next v3 06/12] net: stmmac: dwxgmac: Add Flexible PPS support
From: Jose Abreu @ 2019-08-17 18:54 UTC (permalink / raw)
  To: netdev
  Cc: Joao Pinto, Jakub Kicinski, Jose Abreu, Giuseppe Cavallaro,
	Alexandre Torgue, David S. Miller, Maxime Coquelin, linux-stm32,
	linux-arm-kernel, linux-kernel
In-Reply-To: <cover.1566067802.git.joabreu@synopsys.com>

Add the support for Flexible PPS in XGMAC cores.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>

---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h     | 19 ++++++++
 .../net/ethernet/stmicro/stmmac/dwxgmac2_core.c    | 56 ++++++++++++++++++++++
 2 files changed, 75 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 995d533b9316..dbac63972faf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -149,6 +149,25 @@
 #define XGMAC_TXTIMESTAMP_NSEC		0x00000d30
 #define XGMAC_TXTSSTSLO			GENMASK(30, 0)
 #define XGMAC_TXTIMESTAMP_SEC		0x00000d34
+#define XGMAC_PPS_CONTROL		0x00000d70
+#define XGMAC_PPS_MAXIDX(x)		((((x) + 1) * 8) - 1)
+#define XGMAC_PPS_MINIDX(x)		((x) * 8)
+#define XGMAC_PPSx_MASK(x)		\
+	GENMASK(XGMAC_PPS_MAXIDX(x), XGMAC_PPS_MINIDX(x))
+#define XGMAC_TRGTMODSELx(x, val)	\
+	GENMASK(XGMAC_PPS_MAXIDX(x) - 1, XGMAC_PPS_MAXIDX(x) - 2) & \
+	((val) << (XGMAC_PPS_MAXIDX(x) - 2))
+#define XGMAC_PPSCMDx(x, val)		\
+	GENMASK(XGMAC_PPS_MINIDX(x) + 3, XGMAC_PPS_MINIDX(x)) & \
+	((val) << XGMAC_PPS_MINIDX(x))
+#define XGMAC_PPSCMD_START		0x2
+#define XGMAC_PPSCMD_STOP		0x5
+#define XGMAC_PPSEN0			BIT(4)
+#define XGMAC_PPSx_TARGET_TIME_SEC(x)	(0x00000d80 + (x) * 0x10)
+#define XGMAC_PPSx_TARGET_TIME_NSEC(x)	(0x00000d84 + (x) * 0x10)
+#define XGMAC_TRGTBUSY0			BIT(31)
+#define XGMAC_PPSx_INTERVAL(x)		(0x00000d88 + (x) * 0x10)
+#define XGMAC_PPSx_WIDTH(x)		(0x00000d8c + (x) * 0x10)
 
 /* MTL Registers */
 #define XGMAC_MTL_OPMODE		0x00001000
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index ba5183f38f84..f843e3640f50 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -8,6 +8,7 @@
 #include <linux/crc32.h>
 #include <linux/iopoll.h>
 #include "stmmac.h"
+#include "stmmac_ptp.h"
 #include "dwxgmac2.h"
 
 static void dwxgmac2_core_init(struct mac_device_info *hw,
@@ -1011,6 +1012,60 @@ static int dwxgmac2_get_mac_tx_timestamp(struct mac_device_info *hw, u64 *ts)
 	return 0;
 }
 
+static int dwxgmac2_flex_pps_config(void __iomem *ioaddr, int index,
+				    struct stmmac_pps_cfg *cfg, bool enable,
+				    u32 sub_second_inc, u32 systime_flags)
+{
+	u32 tnsec = readl(ioaddr + XGMAC_PPSx_TARGET_TIME_NSEC(index));
+	u32 val = readl(ioaddr + XGMAC_PPS_CONTROL);
+	u64 period;
+
+	if (!cfg->available)
+		return -EINVAL;
+	if (tnsec & XGMAC_TRGTBUSY0)
+		return -EBUSY;
+	if (!sub_second_inc || !systime_flags)
+		return -EINVAL;
+
+	val &= ~XGMAC_PPSx_MASK(index);
+
+	if (!enable) {
+		val |= XGMAC_PPSCMDx(index, XGMAC_PPSCMD_STOP);
+		writel(val, ioaddr + XGMAC_PPS_CONTROL);
+		return 0;
+	}
+
+	val |= XGMAC_PPSCMDx(index, XGMAC_PPSCMD_START);
+	val |= XGMAC_TRGTMODSELx(index, XGMAC_PPSCMD_START);
+	val |= XGMAC_PPSEN0;
+
+	writel(cfg->start.tv_sec, ioaddr + XGMAC_PPSx_TARGET_TIME_SEC(index));
+
+	if (!(systime_flags & PTP_TCR_TSCTRLSSR))
+		cfg->start.tv_nsec = (cfg->start.tv_nsec * 1000) / 465;
+	writel(cfg->start.tv_nsec, ioaddr + XGMAC_PPSx_TARGET_TIME_NSEC(index));
+
+	period = cfg->period.tv_sec * 1000000000;
+	period += cfg->period.tv_nsec;
+
+	do_div(period, sub_second_inc);
+
+	if (period <= 1)
+		return -EINVAL;
+
+	writel(period - 1, ioaddr + XGMAC_PPSx_INTERVAL(index));
+
+	period >>= 1;
+	if (period <= 1)
+		return -EINVAL;
+
+	writel(period - 1, ioaddr + XGMAC_PPSx_WIDTH(index));
+
+	/* Finally, activate it */
+	writel(val, ioaddr + XGMAC_PPS_CONTROL);
+	return 0;
+}
+
 const struct stmmac_ops dwxgmac210_ops = {
 	.core_init = dwxgmac2_core_init,
 	.set_mac = dwxgmac2_set_mac,
@@ -1048,6 +1103,7 @@ const struct stmmac_ops dwxgmac210_ops = {
 	.update_vlan_hash = dwxgmac2_update_vlan_hash,
 	.rxp_config = dwxgmac3_rxp_config,
 	.get_mac_tx_timestamp = dwxgmac2_get_mac_tx_timestamp,
+	.flex_pps_config = dwxgmac2_flex_pps_config,
 };
 
 int dwxgmac2_setup(struct stmmac_priv *priv)
-- 
2.7.4


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox