Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 01/10] selftests: forwarding: Split mirror_gre_topo_lib.sh
From: Petr Machata @ 2018-05-24 14:27 UTC (permalink / raw)
  To: netdev, linux-kselftest; +Cc: davem, shuah, idosch, jiri
In-Reply-To: <cover.1527171860.git.petrm@mellanox.com>

Move generic parts of mirror_gre_topo_lib.sh into a new file
mirror_topo_lib.sh. Reuse the functions in GRE topo, adding the tunnel
devices as necessary.

Signed-off-by: Petr Machata <petrm@mellanox.com>
---
 .../net/forwarding/mirror_gre_topo_lib.sh          | 53 ++----------
 .../selftests/net/forwarding/mirror_topo_lib.sh    | 99 ++++++++++++++++++++++
 2 files changed, 108 insertions(+), 44 deletions(-)
 create mode 100644 tools/testing/selftests/net/forwarding/mirror_topo_lib.sh

diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
index b3ceda2..2534195 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -33,29 +33,11 @@
 #   |                                                                         |
 #   +-------------------------------------------------------------------------+
 
-mirror_gre_topo_h1_create()
-{
-	simple_if_init $h1 192.0.2.1/28
-}
-
-mirror_gre_topo_h1_destroy()
-{
-	simple_if_fini $h1 192.0.2.1/28
-}
-
-mirror_gre_topo_h2_create()
-{
-	simple_if_init $h2 192.0.2.2/28
-}
-
-mirror_gre_topo_h2_destroy()
-{
-	simple_if_fini $h2 192.0.2.2/28
-}
+source mirror_topo_lib.sh
 
 mirror_gre_topo_h3_create()
 {
-	simple_if_init $h3
+	mirror_topo_h3_create
 
 	tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
 	ip link set h3-gt4 vrf v$h3
@@ -71,49 +53,32 @@ mirror_gre_topo_h3_destroy()
 	tunnel_destroy h3-gt6
 	tunnel_destroy h3-gt4
 
-	simple_if_fini $h3
+	mirror_topo_h3_destroy
 }
 
 mirror_gre_topo_switch_create()
 {
-	ip link set dev $swp3 up
-
-	ip link add name br1 type bridge vlan_filtering 1
-	ip link set dev br1 up
-
-	ip link set dev $swp1 master br1
-	ip link set dev $swp1 up
-
-	ip link set dev $swp2 master br1
-	ip link set dev $swp2 up
+	mirror_topo_switch_create
 
 	tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
 		      ttl 100 tos inherit
 
 	tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
 		      ttl 100 tos inherit allow-localremote
-
-	tc qdisc add dev $swp1 clsact
 }
 
 mirror_gre_topo_switch_destroy()
 {
-	tc qdisc del dev $swp1 clsact
-
 	tunnel_destroy gt6
 	tunnel_destroy gt4
 
-	ip link set dev $swp1 down
-	ip link set dev $swp2 down
-	ip link del dev br1
-
-	ip link set dev $swp3 down
+	mirror_topo_switch_destroy
 }
 
 mirror_gre_topo_create()
 {
-	mirror_gre_topo_h1_create
-	mirror_gre_topo_h2_create
+	mirror_topo_h1_create
+	mirror_topo_h2_create
 	mirror_gre_topo_h3_create
 
 	mirror_gre_topo_switch_create
@@ -124,6 +89,6 @@ mirror_gre_topo_destroy()
 	mirror_gre_topo_switch_destroy
 
 	mirror_gre_topo_h3_destroy
-	mirror_gre_topo_h2_destroy
-	mirror_gre_topo_h1_destroy
+	mirror_topo_h2_destroy
+	mirror_topo_h1_destroy
 }
diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
new file mode 100644
index 0000000..5b787972
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring. The tests that use it
+# tweak it in one way or another--typically add more devices to the topology.
+#
+#   +---------------------+                             +---------------------+
+#   | H1                  |                             |                  H2 |
+#   |     + $h1           |                             |           $h2 +     |
+#   |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#   +-----|---------------+                             +---------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirror                                                   |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   |     + $swp3                                                             |
+#   +-----|-------------------------------------------------------------------+
+#         |
+#   +-----|-------------------------------------------------------------------+
+#   | H3  + $h3                                                               |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+
+mirror_topo_h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+mirror_topo_h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+mirror_topo_h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+}
+
+mirror_topo_h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+mirror_topo_h3_create()
+{
+	simple_if_init $h3
+}
+
+mirror_topo_h3_destroy()
+{
+	simple_if_fini $h3
+}
+
+mirror_topo_switch_create()
+{
+	ip link set dev $swp3 up
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+}
+
+mirror_topo_switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link del dev br1
+
+	ip link set dev $swp3 down
+}
+
+mirror_topo_create()
+{
+	mirror_topo_h1_create
+	mirror_topo_h2_create
+	mirror_topo_h3_create
+
+	mirror_topo_switch_create
+}
+
+mirror_topo_destroy()
+{
+	mirror_topo_switch_destroy
+
+	mirror_topo_h3_destroy
+	mirror_topo_h2_destroy
+	mirror_topo_h1_destroy
+}
-- 
2.4.11

^ permalink raw reply related

* [PATCH net-next 02/10] selftests: forwarding: mirror_gre_lib: Extract generic functions
From: Petr Machata @ 2018-05-24 14:27 UTC (permalink / raw)
  To: netdev, linux-kselftest; +Cc: davem, shuah, idosch, jiri
In-Reply-To: <cover.1527171860.git.petrm@mellanox.com>

For non-GRE mirroring tests, a functions along the lines of
do_test_span_gre_dir_ips() and test_span_gre_dir_ips() are necessary,
but such that they don't assume tunnels are involved. Extract the code
from mirror_gre_lib.sh to mirror_lib.sh and convert to just use a given
device without assuming it's named "h3-$tundev". Convert the two
above-mentioned functions to wrappers that pass along the correct device
name.

Add test_span_dir() and fail_test_span_dir() to round up the API for use
by following patches.

Signed-off-by: Petr Machata <petrm@mellanox.com>
---
 .../selftests/net/forwarding/mirror_gre_lib.sh     | 41 ++++------------
 .../testing/selftests/net/forwarding/mirror_lib.sh | 54 ++++++++++++++++++++++
 2 files changed, 64 insertions(+), 31 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
index 207ffd1..c7b2cdc 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -1,47 +1,26 @@
 # SPDX-License-Identifier: GPL-2.0
 
-do_test_span_gre_dir_ips()
-{
-	local expect=$1; shift
-	local tundev=$1; shift
-	local direction=$1; shift
-	local ip1=$1; shift
-	local ip2=$1; shift
-
-	icmp_capture_install h3-$tundev
-	mirror_test v$h1 $ip1 $ip2 h3-$tundev 100 $expect
-	mirror_test v$h2 $ip2 $ip1 h3-$tundev 100 $expect
-	icmp_capture_uninstall h3-$tundev
-}
+source mirror_lib.sh
 
 quick_test_span_gre_dir_ips()
 {
-	do_test_span_gre_dir_ips 10 "$@"
+	local tundev=$1; shift
+
+	do_test_span_dir_ips 10 h3-$tundev "$@"
 }
 
 fail_test_span_gre_dir_ips()
 {
-	do_test_span_gre_dir_ips 0 "$@"
+	local tundev=$1; shift
+
+	do_test_span_dir_ips 0 h3-$tundev "$@"
 }
 
 test_span_gre_dir_ips()
 {
 	local tundev=$1; shift
-	local direction=$1; shift
-	local forward_type=$1; shift
-	local backward_type=$1; shift
-	local ip1=$1; shift
-	local ip2=$1; shift
-
-	quick_test_span_gre_dir_ips "$tundev" "$direction" "$ip1" "$ip2"
-
-	icmp_capture_install h3-$tundev "type $forward_type"
-	mirror_test v$h1 $ip1 $ip2 h3-$tundev 100 10
-	icmp_capture_uninstall h3-$tundev
 
-	icmp_capture_install h3-$tundev "type $backward_type"
-	mirror_test v$h2 $ip2 $ip1 h3-$tundev 100 10
-	icmp_capture_uninstall h3-$tundev
+	test_span_dir_ips h3-$tundev "$@"
 }
 
 full_test_span_gre_dir_ips()
@@ -57,8 +36,8 @@ full_test_span_gre_dir_ips()
 	RET=0
 
 	mirror_install $swp1 $direction $tundev "matchall $tcflags"
-	test_span_gre_dir_ips "$tundev" "$direction" "$forward_type" \
-			      "$backward_type" "$ip1" "$ip2"
+	test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+			  "$backward_type" "$ip1" "$ip2"
 	mirror_uninstall $swp1 $direction
 
 	log_test "$direction $what ($tcflags)"
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index e5028a5..04cbc38 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -38,3 +38,57 @@ mirror_test()
 	((expect <= delta && delta <= expect + 2))
 	check_err $? "Expected to capture $expect packets, got $delta."
 }
+
+do_test_span_dir_ips()
+{
+	local expect=$1; shift
+	local dev=$1; shift
+	local direction=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	icmp_capture_install $dev
+	mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+	mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+	icmp_capture_uninstall $dev
+}
+
+quick_test_span_dir_ips()
+{
+	do_test_span_dir_ips 10 "$@"
+}
+
+fail_test_span_dir_ips()
+{
+	do_test_span_dir_ips 0 "$@"
+}
+
+test_span_dir_ips()
+{
+	local dev=$1; shift
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2"
+
+	icmp_capture_install $dev "type $forward_type"
+	mirror_test v$h1 $ip1 $ip2 $dev 100 10
+	icmp_capture_uninstall $dev
+
+	icmp_capture_install $dev "type $backward_type"
+	mirror_test v$h2 $ip2 $ip1 $dev 100 10
+	icmp_capture_uninstall $dev
+}
+
+fail_test_span_dir()
+{
+	fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_dir()
+{
+	test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
-- 
2.4.11

^ permalink raw reply related

* [PATCH net-next 03/10] selftests: forwarding: Add $h3's clsact to mirror_topo_lib.sh
From: Petr Machata @ 2018-05-24 14:27 UTC (permalink / raw)
  To: netdev, linux-kselftest; +Cc: davem, shuah, idosch, jiri
In-Reply-To: <cover.1527171860.git.petrm@mellanox.com>

Having a clsact qdisc on $h3 is useful in several tests, and will be
useful in more tests to come. Move the registration from all the tests
that need it into the topology file itself.

Signed-off-by: Petr Machata <petrm@mellanox.com>
---
 tools/testing/selftests/net/forwarding/mirror_gre.sh         | 2 --
 tools/testing/selftests/net/forwarding/mirror_gre_changes.sh | 2 --
 tools/testing/selftests/net/forwarding/mirror_topo_lib.sh    | 2 ++
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
index c6786d1..e6fd7a1 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -72,7 +72,6 @@ test_span_gre_mac()
 	RET=0
 
 	mirror_install $swp1 $direction $tundev "matchall $tcflags"
-	tc qdisc add dev $h3 clsact
 	tc filter add dev $h3 ingress pref 77 prot $prot \
 		flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \
 		action pass
@@ -80,7 +79,6 @@ test_span_gre_mac()
 	mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
 
 	tc filter del dev $h3 ingress pref 77
-	tc qdisc del dev $h3 clsact
 	mirror_uninstall $swp1 $direction
 
 	log_test "$direction $what: envelope MAC ($tcflags)"
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
index e22a9e4..aa29d46 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -73,7 +73,6 @@ test_span_gre_ttl()
 	RET=0
 
 	mirror_install $swp1 ingress $tundev "matchall $tcflags"
-	tc qdisc add dev $h3 clsact
 	tc filter add dev $h3 ingress pref 77 prot $prot \
 		flower ip_ttl 50 action pass
 
@@ -84,7 +83,6 @@ test_span_gre_ttl()
 
 	ip link set dev $tundev type $type ttl 100
 	tc filter del dev $h3 ingress pref 77
-	tc qdisc del dev $h3 clsact
 	mirror_uninstall $swp1 ingress
 
 	log_test "$what: TTL change ($tcflags)"
diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
index 5b787972..04979e5 100644
--- a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
@@ -46,10 +46,12 @@ mirror_topo_h2_destroy()
 mirror_topo_h3_create()
 {
 	simple_if_init $h3
+	tc qdisc add dev $h3 clsact
 }
 
 mirror_topo_h3_destroy()
 {
+	tc qdisc del dev $h3 clsact
 	simple_if_fini $h3
 }
 
-- 
2.4.11

^ permalink raw reply related

* [PATCH net-next 04/10] selftests: forwarding: lib: Support VLAN devices
From: Petr Machata @ 2018-05-24 14:27 UTC (permalink / raw)
  To: netdev, linux-kselftest; +Cc: davem, shuah, idosch, jiri
In-Reply-To: <cover.1527171860.git.petrm@mellanox.com>

Add vlan_create() and vlan_destroy() to manage VLAN netdevices.

Signed-off-by: Petr Machata <petrm@mellanox.com>
---
 tools/testing/selftests/net/forwarding/lib.sh | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index d5aa864..11c481c 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -362,6 +362,31 @@ tunnel_destroy()
 	ip link del dev $name
 }
 
+vlan_create()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local vrf=$1; shift
+	local ips=("${@}")
+	local name=$if_name.$vid
+
+	ip link add name $name link $if_name type vlan id $vid
+	if [ "$vrf" != "" ]; then
+		ip link set dev $name master $vrf
+	fi
+	ip link set dev $name up
+	__addr_add_del $name add "${ips[@]}"
+}
+
+vlan_destroy()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local name=$if_name.$vid
+
+	ip link del dev $name
+}
+
 master_name_get()
 {
 	local if_name=$1
-- 
2.4.11

^ permalink raw reply related

* [PATCH net-next 05/10] selftests: forwarding: mirror_gre_lib: Support VLAN
From: Petr Machata @ 2018-05-24 14:27 UTC (permalink / raw)
  To: netdev, linux-kselftest; +Cc: davem, shuah, idosch, jiri
In-Reply-To: <cover.1527171860.git.petrm@mellanox.com>

Add full_test_span_gre_dir_vlan_ips() and full_test_span_gre_dir_vlan()
to support mirror-to-gre tests that involve VLAN.

Signed-off-by: Petr Machata <petrm@mellanox.com>
---
 .../selftests/net/forwarding/mirror_gre_lib.sh     | 34 ++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
index c7b2cdc..92ef6dd 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -43,6 +43,35 @@ full_test_span_gre_dir_ips()
 	log_test "$direction $what ($tcflags)"
 }
 
+full_test_span_gre_dir_vlan_ips()
+{
+	local tundev=$1; shift
+	local direction=$1; shift
+	local vlan_match=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local what=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $tundev "matchall $tcflags"
+
+	test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+			  "$backward_type" "$ip1" "$ip2"
+
+	tc filter add dev $h3 ingress pref 77 prot 802.1q \
+		flower $vlan_match ip_proto 0x2f \
+		action pass
+	mirror_test v$h1 $ip1 $ip2 $h3 77 10
+	tc filter del dev $h3 ingress pref 77
+
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction $what ($tcflags)"
+}
+
 quick_test_span_gre_dir()
 {
 	quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
@@ -62,3 +91,8 @@ full_test_span_gre_dir()
 {
 	full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
 }
+
+full_test_span_gre_dir_vlan()
+{
+	full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2
+}
-- 
2.4.11

^ permalink raw reply related

* [PATCH net-next 06/10] selftests: forwarding: lib: Extract trap_{,un}install()
From: Petr Machata @ 2018-05-24 14:27 UTC (permalink / raw)
  To: netdev, linux-kselftest; +Cc: davem, shuah, idosch, jiri
In-Reply-To: <cover.1527171860.git.petrm@mellanox.com>

A mirror-to-vlan test that's coming next needs to install the trap
unconditionally. Therefore extract from slow_path_trap_{,un}install()
a more generic functions trap_install() and trap_uninstall(), and covert
the former two to conditional wrappers around these.

Signed-off-by: Petr Machata <petrm@mellanox.com>
---
 tools/testing/selftests/net/forwarding/lib.sh | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 11c481c..e78ee7e 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -472,26 +472,35 @@ tc_offload_check()
 	return 0
 }
 
-slow_path_trap_install()
+trap_install()
 {
 	local dev=$1; shift
 	local direction=$1; shift
 
-	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
-		# For slow-path testing, we need to install a trap to get to
-		# slow path the packets that would otherwise be switched in HW.
-		tc filter add dev $dev $direction pref 1 \
-		   flower skip_sw action trap
-	fi
+	# For slow-path testing, we need to install a trap to get to
+	# slow path the packets that would otherwise be switched in HW.
+	tc filter add dev $dev $direction pref 1 flower skip_sw action trap
 }
 
-slow_path_trap_uninstall()
+trap_uninstall()
 {
 	local dev=$1; shift
 	local direction=$1; shift
 
+	tc filter del dev $dev $direction pref 1 flower skip_sw
+}
+
+slow_path_trap_install()
+{
+	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+		trap_install "$@"
+	fi
+}
+
+slow_path_trap_uninstall()
+{
 	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
-		tc filter del dev $dev $direction pref 1 flower skip_sw
+		trap_uninstall "$@"
 	fi
 }
 
-- 
2.4.11

^ permalink raw reply related

* [PATCH net-next 07/10] selftests: forwarding: Test mirror-to-vlan
From: Petr Machata @ 2018-05-24 14:27 UTC (permalink / raw)
  To: netdev, linux-kselftest; +Cc: davem, shuah, idosch, jiri
In-Reply-To: <cover.1527171860.git.petrm@mellanox.com>

Test for "tc action mirred egress mirror" that mirrors to a vlan device.
- test_vlan() tests that the packets get mirrored
- test_tagged_vlan() tests that the mirrored packets have correct inner
  VLAN tag.

Signed-off-by: Petr Machata <petrm@mellanox.com>
---
 .../selftests/net/forwarding/mirror_vlan.sh        | 169 +++++++++++++++++++++
 1 file changed, 169 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/mirror_vlan.sh

diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
new file mode 100755
index 0000000..1e10520
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing mirroring. See mirror_topo_lib.sh
+# for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a vlan device.
+
+ALL_TESTS="
+	test_vlan
+	test_tagged_vlan
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_topo_create
+
+	vlan_create $swp3 555
+
+	vlan_create $h3 555 v$h3
+	matchall_sink_create $h3.555
+
+	vlan_create $h1 111 v$h1 192.0.2.17/28
+	bridge vlan add dev $swp1 vid 111
+
+	vlan_create $h2 111 v$h2 192.0.2.18/28
+	bridge vlan add dev $swp2 vid 111
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vlan_destroy $h2 111
+	vlan_destroy $h1 111
+	vlan_destroy $h3 555
+	vlan_destroy $swp3 555
+
+	mirror_topo_destroy
+	vrf_cleanup
+}
+
+test_vlan_dir()
+{
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+	test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type"
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction mirror to vlan ($tcflags)"
+}
+
+test_vlan()
+{
+	test_vlan_dir ingress 8 0
+	test_vlan_dir egress 0 8
+}
+
+vlan_capture_add_del()
+{
+	local add_del=$1; shift
+	local pref=$1; shift
+	local dev=$1; shift
+	local filter=$1; shift
+
+	tc filter $add_del dev "$dev" ingress \
+	   proto 802.1q pref $pref \
+	   flower $filter \
+	   action pass
+}
+
+vlan_capture_install()
+{
+	vlan_capture_add_del add 100 "$@"
+}
+
+vlan_capture_uninstall()
+{
+	vlan_capture_add_del del 100 "$@"
+}
+
+do_test_span_vlan_dir_ips()
+{
+	local expect=$1; shift
+	local dev=$1; shift
+	local vid=$1; shift
+	local direction=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	vlan_capture_install $dev "vlan_id $vid"
+	mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+	mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+	vlan_capture_uninstall $dev
+}
+
+test_tagged_vlan_dir()
+{
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+	do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \
+				  192.0.2.17 192.0.2.18
+	do_test_span_vlan_dir_ips  0 "$h3.555" 555 "$direction" \
+				  192.0.2.17 192.0.2.18
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction mirror to vlan ($tcflags)"
+}
+
+test_tagged_vlan()
+{
+	test_tagged_vlan_dir ingress 8 0
+	test_tagged_vlan_dir egress 0 8
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+	trap_install $h3 ingress
+
+	tests_run
+
+	trap_install $h3 ingress
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
-- 
2.4.11

^ permalink raw reply related

* [PATCH net-next 08/10] selftests: forwarding: Test mirror-to-gre w/ UL VLAN+802.1q
From: Petr Machata @ 2018-05-24 14:27 UTC (permalink / raw)
  To: netdev, linux-kselftest; +Cc: davem, shuah, idosch, jiri
In-Reply-To: <cover.1527171860.git.petrm@mellanox.com>

Test for "tc action mirred egress mirror" that mirrors to GRE when the
underlay route points at a vlan device on top of a bridge device with
vlan filtering (802.1q).

Signed-off-by: Petr Machata <petrm@mellanox.com>
---
 .../net/forwarding/mirror_gre_vlan_bridge_1q.sh    | 140 +++++++++++++++++++++
 1 file changed, 140 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh

diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
new file mode 100755
index 0000000..01ec28a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# vlan device on top of a bridge device with vlan filtering (802.1q).
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+	test_gretap_forbidden
+	test_ip6gretap_forbidden
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	vlan_create br1 555 "" 192.0.2.129/32 2001:db8:2::1/128
+	bridge vlan add dev br1 vid 555 self
+	ip route rep 192.0.2.130/32 dev br1.555
+	ip -6 route rep 2001:db8:2::2/128 dev br1.555
+
+	vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+
+	ip link set dev $swp3 master br1
+	bridge vlan add dev $swp3 vid 555
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp3 nomaster
+	vlan_destroy $h3 555
+	vlan_destroy br1 555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_vlan_match()
+{
+	local tundev=$1; shift
+	local vlan_match=$1; shift
+	local what=$1; shift
+
+	full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+	full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+	test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap"
+}
+
+test_span_gre_forbidden()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	# Run the pass-test first, to prime neighbor table.
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+
+	# Now forbid the VLAN at the bridge and see it fail.
+	bridge vlan del dev br1 vid 555 self
+	sleep 1
+
+	fail_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	bridge vlan add dev br1 vid 555 self
+	sleep 1
+
+	log_test "$what: vlan forbidden at a bridge ($tcflags)"
+}
+
+test_gretap_forbidden()
+{
+	test_span_gre_forbidden gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden()
+{
+	test_span_gre_forbidden gt4 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
-- 
2.4.11

^ permalink raw reply related

* [PATCH net-next 09/10] selftests: forwarding: Test mirror-to-gre w/ UL VLAN
From: Petr Machata @ 2018-05-24 14:28 UTC (permalink / raw)
  To: netdev, linux-kselftest; +Cc: davem, shuah, idosch, jiri
In-Reply-To: <cover.1527171860.git.petrm@mellanox.com>

Test for "tc action mirred egress mirror" that mirrors to a gretap
netdevice whose underlay route points at a vlan device.

Signed-off-by: Petr Machata <petrm@mellanox.com>
---
 .../selftests/net/forwarding/mirror_gre_vlan.sh    | 92 ++++++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh

diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
new file mode 100755
index 0000000..88cecdb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a gretap netdevice
+# whose underlay route points at a vlan device.
+
+ALL_TESTS="
+	test_gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip link add name $swp3.555 link $swp3 type vlan id 555
+	ip address add dev $swp3.555 192.0.2.129/32
+	ip address add dev $swp3.555 2001:db8:2::1/128
+	ip link set dev $swp3.555 up
+
+	ip route add 192.0.2.130/32 dev $swp3.555
+	ip -6 route add 2001:db8:2::2/128 dev $swp3.555
+
+	ip link add name $h3.555 link $h3 type vlan id 555
+	ip link set dev $h3.555 master v$h3
+	ip address add dev $h3.555 192.0.2.130/28
+	ip address add dev $h3.555 2001:db8:2::2/64
+	ip link set dev $h3.555 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link del dev $h3.555
+	ip link del dev $swp3.555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+	full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
-- 
2.4.11

^ permalink raw reply related

* [PATCH net-next 10/10] selftests: forwarding: Test mirror-to-gre w/ UL 802.1d+VLAN
From: Petr Machata @ 2018-05-24 14:28 UTC (permalink / raw)
  To: netdev, linux-kselftest; +Cc: davem, shuah, idosch, jiri
In-Reply-To: <cover.1527171860.git.petrm@mellanox.com>

Test for "tc action mirred egress mirror" that mirrors to GRE when the
underlay route points at an 802.1d bridge and packet egresses through a
VLAN device.

Besides testing basic connectivity, this also tests that the traffic is
properly tagged.

Signed-off-by: Petr Machata <petrm@mellanox.com>
---
 .../net/forwarding/mirror_gre_bridge_1d_vlan.sh    | 109 +++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh

diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
new file mode 100755
index 0000000..3d47afc
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device without vlan filtering (802.1d). The device attached to that
+# bridge is a VLAN.
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev br2 up
+
+	vlan_create $swp3 555
+
+	ip link set dev $swp3.555 master br2
+	ip route add 192.0.2.130/32 dev br2
+	ip -6 route add 2001:db8:2::2/128 dev br2
+
+	ip address add dev br2 192.0.2.129/32
+	ip address add dev br2 2001:db8:2::1/128
+
+	vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vlan_destroy $h3 555
+	ip link del dev br2
+	vlan_destroy $swp3 555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_vlan_match()
+{
+	local tundev=$1; shift
+	local vlan_match=$1; shift
+	local what=$1; shift
+
+	full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+	full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+	test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
-- 
2.4.11

^ permalink raw reply related

* linux-next: manual merge of the net-next tree with the net tree
From: Mark Brown @ 2018-05-24 14:35 UTC (permalink / raw)
  To: David Miller, Networking, Roopa Prabhu
  Cc: Linux-Next Mailing List, Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 1307 bytes --]

Hi all,

Today's linux-next merge of the net-next tree got a conflict in:

  net/ipv4/fib_frontend.c

between commit:

  2eabd764cb5512f1338 ("net: ipv4: add missing RTA_TABLE to rtm_ipv4_policy")

from the net tree and commit:

  404eb77ea766260c45c ("ipv4: support sport, dport and ip_proto in RTM_GETROUTE")

from the net-next tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

diff --cc net/ipv4/fib_frontend.c
index e66172aaf241,897ae92dff0f..000000000000
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@@ -649,7 -649,9 +649,10 @@@ const struct nla_policy rtm_ipv4_policy
  	[RTA_ENCAP]		= { .type = NLA_NESTED },
  	[RTA_UID]		= { .type = NLA_U32 },
  	[RTA_MARK]		= { .type = NLA_U32 },
 +	[RTA_TABLE]		= { .type = NLA_U32 },
+ 	[RTA_IP_PROTO]		= { .type = NLA_U8 },
+ 	[RTA_SPORT]		= { .type = NLA_U16 },
+ 	[RTA_DPORT]		= { .type = NLA_U16 },
  };
  
  static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH 2/4] arcnet: com20020: bindings for smsc com20020
From: Rob Herring @ 2018-05-24 14:36 UTC (permalink / raw)
  To: Andrea Greco
  Cc: Tobin C. Harding, Andrea Greco, Mark Rutland, netdev, devicetree,
	linux-kernel@vger.kernel.org
In-Reply-To: <CAPoXtQJYmfk+-4qOpbEetUAdVwTgVPmk=mQaFv7yLF2V2dO=4A@mail.gmail.com>

On Wed, May 23, 2018 at 5:17 PM, Andrea Greco
<andrea.greco.gapmilano@gmail.com> wrote:
> On 05/23/2018 06:49 PM, Rob Herring wrote:
>> One typo, otherwise:
>>
>> Reviewed-by: Rob Herring <robh@kernel.org>
>
> Yes typo, Fixed over my branch, sorry for that...
> I expect a comment about bps, Bit per Second, used in `bus-speed-bps`
> You will add it by your self in property-units.txt, or required my patch?

If you want to add it, that's fine. But it's really not something that
comes up often. For UARTs, there's already the "current-speed"
property and most other things I can think of use Hz to express
speeds.

Rob

^ permalink raw reply

* Re: [net-next 1/6] net/dcb: Add dcbnl buffer attribute
From: Huy Nguyen @ 2018-05-24 14:37 UTC (permalink / raw)
  To: John Fastabend, Jiri Pirko, Jakub Kicinski
  Cc: Saeed Mahameed, David S. Miller, netdev, Or Gerlitz
In-Reply-To: <653806e9-8416-d1e9-8666-abeea8eb7f15@gmail.com>

On 5/23/2018 11:03 AM, John Fastabend wrote:
> On 05/23/2018 08:37 AM, Huy Nguyen wrote:
>>
>> On 5/23/2018 8:52 AM, John Fastabend wrote:
>>> It would be nice though if the API gave us some hint on max/min/stride
>>> of allowed values. Could the get API return these along with current
>>> value? Presumably the allowed max size could change with devlink buffer
>>> changes in how the global buffer is divided up as well.
>> Acked. I will add Max. Let's skip min/stride since it is too hardware specific.
> At minimum then we need to document for driver writers what to do
> with a value that falls between strides. Round-up or round-down.
>
> .John
V2 still under internal review. But here are the changes in patch #1 and 
patch #6.
patch #1
Changes in V2:
     Add total_size in dcbnl_buffer to report the total available buffer 
size of the netdev.
     Code changes are in patch #1 and #6.

patch #6 commit message
Changes in V2:
     Report total available buffer size of the netdev.
     Comment on buffer stride:
     Mellanox HCA buffer stride is 128 Bytes. If the
     buffer size is not multiple of 128, the buffer size will be rounded 
down
     to the nearest multiple of 128.

^ permalink raw reply

* [bpf-next V5 PATCH 0/8] xdp: introduce bulking for ndo_xdp_xmit API
From: Jesper Dangaard Brouer @ 2018-05-24 14:45 UTC (permalink / raw)
  To: netdev, Daniel Borkmann, Alexei Starovoitov,
	Jesper Dangaard Brouer
  Cc: Christoph Hellwig, BjörnTöpel, John Fastabend,
	Magnus Karlsson, makita.toshiaki

This patchset change ndo_xdp_xmit API to take a bulk of xdp frames.

When kernel is compiled with CONFIG_RETPOLINE, every indirect function
pointer (branch) call hurts performance. For XDP this have a huge
negative performance impact.

This patchset reduce the needed (indirect) calls to ndo_xdp_xmit, but
also prepares for further optimizations.  The DMA APIs use of indirect
function pointer calls is the primary source the regression.  It is
left for a followup patchset, to use bulking calls towards the DMA API
(via the scatter-gatter calls).

The other advantage of this API change is that drivers can easier
amortize the cost of any sync/locking scheme, over the bulk of
packets.  The assumption of the current API is that the driver
implemementing the NDO will also allocate a dedicated XDP TX queue for
every CPU in the system.  Which is not always possible or practical to
configure. E.g. ixgbe cannot load an XDP program on a machine with
more than 96 CPUs, due to limited hardware TX queues.  E.g. virtio_net
is hard to configure as it requires manually increasing the
queues. E.g. tun driver chooses to use a per XDP frame producer lock
modulo smp_processor_id over avail queues.

I'm considered adding 'flags' to ndo_xdp_xmit, but it's not part of
this patchset.  This will be a followup patchset, once we know if this
will be needed (e.g. for non-map xdp_redirect flush-flag, and if
AF_XDP chooses to use ndo_xdp_xmit for TX).

---
V5: Fixed up issues spotted by Daniel and John

V4: Splitout the patches from 4 to 8 patches.  I cannot split the
driver changes from the NDO change, but I've tried to isolated the NDO
change together with the driver change as much as possible.


Jesper Dangaard Brouer (8):
      bpf: devmap introduce dev_map_enqueue
      bpf: devmap prepare xdp frames for bulking
      xdp: add tracepoint for devmap like cpumap have
      samples/bpf: xdp_monitor use tracepoint xdp:xdp_devmap_xmit
      xdp: introduce xdp_return_frame_rx_napi
      xdp: change ndo_xdp_xmit API to support bulking
      xdp/trace: extend tracepoint in devmap with an err
      samples/bpf: xdp_monitor use err code from tracepoint xdp:xdp_devmap_xmit


 drivers/net/ethernet/intel/i40e/i40e_txrx.c   |   26 ++++-
 drivers/net/ethernet/intel/i40e/i40e_txrx.h   |    2 
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   21 +++-
 drivers/net/tun.c                             |   37 +++++--
 drivers/net/virtio_net.c                      |   66 +++++++++----
 include/linux/bpf.h                           |   18 +++
 include/linux/netdevice.h                     |   14 ++-
 include/net/page_pool.h                       |    5 +
 include/net/xdp.h                             |    1 
 include/trace/events/xdp.h                    |   50 +++++++++-
 kernel/bpf/cpumap.c                           |    2 
 kernel/bpf/devmap.c                           |  131 +++++++++++++++++++++++--
 net/core/filter.c                             |   23 +---
 net/core/xdp.c                                |   20 +++-
 samples/bpf/xdp_monitor_kern.c                |   49 +++++++++
 samples/bpf/xdp_monitor_user.c                |   69 +++++++++++++
 16 files changed, 448 insertions(+), 86 deletions(-)

^ permalink raw reply

* [bpf-next V5 PATCH 1/8] bpf: devmap introduce dev_map_enqueue
From: Jesper Dangaard Brouer @ 2018-05-24 14:45 UTC (permalink / raw)
  To: netdev, Daniel Borkmann, Alexei Starovoitov,
	Jesper Dangaard Brouer
  Cc: Christoph Hellwig, BjörnTöpel, John Fastabend,
	Magnus Karlsson, makita.toshiaki
In-Reply-To: <152717306303.4777.4205616217877503311.stgit@firesoul>

Functionality is the same, but the ndo_xdp_xmit call is now
simply invoked from inside the devmap.c code.

V2: Fix compile issue reported by kbuild test robot <lkp@intel.com>

V5: Cleanups requested by Daniel
 - Newlines before func definition
 - Use BUILD_BUG_ON checks
 - Remove unnecessary use return value store in dev_map_enqueue

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/linux/bpf.h        |   16 +++++++++++++---
 include/trace/events/xdp.h |    9 ++++++++-
 kernel/bpf/devmap.c        |   34 ++++++++++++++++++++++++++++------
 net/core/filter.c          |   15 ++-------------
 4 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1795eeee846c..23a809da452d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -487,14 +487,16 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
 void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
 
 /* Map specifics */
-struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
+struct xdp_buff;
+
+struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
 void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
 void __dev_map_flush(struct bpf_map *map);
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp);
 
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
 void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
 void __cpu_map_flush(struct bpf_map *map);
-struct xdp_buff;
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
 
@@ -573,6 +575,15 @@ static inline void __dev_map_flush(struct bpf_map *map)
 {
 }
 
+struct xdp_buff;
+struct bpf_dtab_netdev;
+
+static inline
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
+{
+	return 0;
+}
+
 static inline
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 {
@@ -587,7 +598,6 @@ static inline void __cpu_map_flush(struct bpf_map *map)
 {
 }
 
-struct xdp_buff;
 static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
 				  struct xdp_buff *xdp,
 				  struct net_device *dev_rx)
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 8989a92c571a..96104610d40e 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -138,11 +138,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
 		  __entry->map_id, __entry->map_index)
 );
 
+#ifndef __DEVMAP_OBJ_TYPE
+#define __DEVMAP_OBJ_TYPE
+struct _bpf_dtab_netdev {
+	struct net_device *dev;
+};
+#endif /* __DEVMAP_OBJ_TYPE */
+
 #define devmap_ifindex(fwd, map)				\
 	(!fwd ? 0 :						\
 	 (!map ? 0 :						\
 	  ((map->map_type == BPF_MAP_TYPE_DEVMAP) ?		\
-	   ((struct net_device *)fwd)->ifindex : 0)))
+	   ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
 
 #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)		\
 	 trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map),	\
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 565f9ece9115..06c400e7e4ff 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -48,13 +48,15 @@
  * calls will fail at this point.
  */
 #include <linux/bpf.h>
+#include <net/xdp.h>
 #include <linux/filter.h>
+#include <trace/events/xdp.h>
 
 #define DEV_CREATE_FLAG_MASK \
 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
 
 struct bpf_dtab_netdev {
-	struct net_device *dev;
+	struct net_device *dev; /* must be first member, due to tracepoint */
 	struct bpf_dtab *dtab;
 	unsigned int bit;
 	struct rcu_head rcu;
@@ -240,21 +242,38 @@ void __dev_map_flush(struct bpf_map *map)
  * update happens in parallel here a dev_put wont happen until after reading the
  * ifindex.
  */
-struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
+struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-	struct bpf_dtab_netdev *dev;
+	struct bpf_dtab_netdev *obj;
 
 	if (key >= map->max_entries)
 		return NULL;
 
-	dev = READ_ONCE(dtab->netdev_map[key]);
-	return dev ? dev->dev : NULL;
+	obj = READ_ONCE(dtab->netdev_map[key]);
+	return obj;
+}
+
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
+{
+	struct net_device *dev = dst->dev;
+	struct xdp_frame *xdpf;
+
+	if (!dev->netdev_ops->ndo_xdp_xmit)
+		return -EOPNOTSUPP;
+
+	xdpf = convert_to_xdp_frame(xdp);
+	if (unlikely(!xdpf))
+		return -EOVERFLOW;
+
+	/* TODO: implement a bulking/enqueue step later */
+	return dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
 }
 
 static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
 {
-	struct net_device *dev = __dev_map_lookup_elem(map, *(u32 *)key);
+	struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
+	struct net_device *dev = dev = obj ? obj->dev : NULL;
 
 	return dev ? &dev->ifindex : NULL;
 }
@@ -405,6 +424,9 @@ static struct notifier_block dev_map_notifier = {
 
 static int __init dev_map_init(void)
 {
+	/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
+	BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
+		     offsetof(struct _bpf_dtab_netdev, dev));
 	register_netdevice_notifier(&dev_map_notifier);
 	return 0;
 }
diff --git a/net/core/filter.c b/net/core/filter.c
index aa114c4acb25..c867106d3707 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3065,20 +3065,9 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
 
 	switch (map->map_type) {
 	case BPF_MAP_TYPE_DEVMAP: {
-		struct net_device *dev = fwd;
-		struct xdp_frame *xdpf;
+		struct bpf_dtab_netdev *dst = fwd;
 
-		if (!dev->netdev_ops->ndo_xdp_xmit)
-			return -EOPNOTSUPP;
-
-		xdpf = convert_to_xdp_frame(xdp);
-		if (unlikely(!xdpf))
-			return -EOVERFLOW;
-
-		/* TODO: move to inside map code instead, for bulk support
-		 * err = dev_map_enqueue(dev, xdp);
-		 */
-		err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
+		err = dev_map_enqueue(dst, xdp);
 		if (err)
 			return err;
 		__dev_map_insert_ctx(map, index);

^ permalink raw reply related

* [bpf-next V5 PATCH 2/8] bpf: devmap prepare xdp frames for bulking
From: Jesper Dangaard Brouer @ 2018-05-24 14:45 UTC (permalink / raw)
  To: netdev, Daniel Borkmann, Alexei Starovoitov,
	Jesper Dangaard Brouer
  Cc: Christoph Hellwig, BjörnTöpel, John Fastabend,
	Magnus Karlsson, makita.toshiaki
In-Reply-To: <152717306303.4777.4205616217877503311.stgit@firesoul>

Like cpumap create queue for xdp frames that will be bulked.  For now,
this patch simply invoke ndo_xdp_xmit foreach frame.  This happens,
either when the map flush operation is envoked, or when the limit
DEV_MAP_BULK_SIZE is reached.

V5: Avoid memleak on error path in dev_map_update_elem()

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 kernel/bpf/devmap.c |   74 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 70 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 06c400e7e4ff..15293b9dfb77 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -55,10 +55,17 @@
 #define DEV_CREATE_FLAG_MASK \
 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
 
+#define DEV_MAP_BULK_SIZE 16
+struct xdp_bulk_queue {
+	struct xdp_frame *q[DEV_MAP_BULK_SIZE];
+	unsigned int count;
+};
+
 struct bpf_dtab_netdev {
 	struct net_device *dev; /* must be first member, due to tracepoint */
 	struct bpf_dtab *dtab;
 	unsigned int bit;
+	struct xdp_bulk_queue __percpu *bulkq;
 	struct rcu_head rcu;
 };
 
@@ -208,6 +215,34 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
 	__set_bit(bit, bitmap);
 }
 
+static int bq_xmit_all(struct bpf_dtab_netdev *obj,
+			 struct xdp_bulk_queue *bq)
+{
+	struct net_device *dev = obj->dev;
+	int i;
+
+	if (unlikely(!bq->count))
+		return 0;
+
+	for (i = 0; i < bq->count; i++) {
+		struct xdp_frame *xdpf = bq->q[i];
+
+		prefetch(xdpf);
+	}
+
+	for (i = 0; i < bq->count; i++) {
+		struct xdp_frame *xdpf = bq->q[i];
+		int err;
+
+		err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
+		if (err)
+			xdp_return_frame(xdpf);
+	}
+	bq->count = 0;
+
+	return 0;
+}
+
 /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
  * from the driver before returning from its napi->poll() routine. The poll()
  * routine is called either from busy_poll context or net_rx_action signaled
@@ -223,6 +258,7 @@ void __dev_map_flush(struct bpf_map *map)
 
 	for_each_set_bit(bit, bitmap, map->max_entries) {
 		struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
+		struct xdp_bulk_queue *bq;
 		struct net_device *netdev;
 
 		/* This is possible if the dev entry is removed by user space
@@ -232,6 +268,9 @@ void __dev_map_flush(struct bpf_map *map)
 			continue;
 
 		__clear_bit(bit, bitmap);
+
+		bq = this_cpu_ptr(dev->bulkq);
+		bq_xmit_all(dev, bq);
 		netdev = dev->dev;
 		if (likely(netdev->netdev_ops->ndo_xdp_flush))
 			netdev->netdev_ops->ndo_xdp_flush(netdev);
@@ -254,6 +293,20 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 	return obj;
 }
 
+/* Runs under RCU-read-side, plus in softirq under NAPI protection.
+ * Thus, safe percpu variable access.
+ */
+static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
+{
+	struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
+
+	if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
+		bq_xmit_all(obj, bq);
+
+	bq->q[bq->count++] = xdpf;
+	return 0;
+}
+
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
 {
 	struct net_device *dev = dst->dev;
@@ -266,8 +319,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
 
-	/* TODO: implement a bulking/enqueue step later */
-	return dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
+	return bq_enqueue(dst, xdpf);
 }
 
 static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
@@ -282,13 +334,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
 {
 	if (dev->dev->netdev_ops->ndo_xdp_flush) {
 		struct net_device *fl = dev->dev;
+		struct xdp_bulk_queue *bq;
 		unsigned long *bitmap;
+
 		int cpu;
 
 		for_each_online_cpu(cpu) {
 			bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
 			__clear_bit(dev->bit, bitmap);
 
+			bq = per_cpu_ptr(dev->bulkq, cpu);
+			bq_xmit_all(dev, bq);
+
 			fl->netdev_ops->ndo_xdp_flush(dev->dev);
 		}
 	}
@@ -300,6 +357,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
 
 	dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
 	dev_map_flush_old(dev);
+	free_percpu(dev->bulkq);
 	dev_put(dev->dev);
 	kfree(dev);
 }
@@ -332,6 +390,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct net *net = current->nsproxy->net_ns;
+	gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
 	struct bpf_dtab_netdev *dev, *old_dev;
 	u32 i = *(u32 *)key;
 	u32 ifindex = *(u32 *)value;
@@ -346,13 +405,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
 	if (!ifindex) {
 		dev = NULL;
 	} else {
-		dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
-				   map->numa_node);
+		dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
 		if (!dev)
 			return -ENOMEM;
 
+		dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
+						sizeof(void *), gfp);
+		if (!dev->bulkq) {
+			kfree(dev);
+			return -ENOMEM;
+		}
+
 		dev->dev = dev_get_by_index(net, ifindex);
 		if (!dev->dev) {
+			free_percpu(dev->bulkq);
 			kfree(dev);
 			return -EINVAL;
 		}

^ permalink raw reply related

* [bpf-next V5 PATCH 3/8] xdp: add tracepoint for devmap like cpumap have
From: Jesper Dangaard Brouer @ 2018-05-24 14:45 UTC (permalink / raw)
  To: netdev, Daniel Borkmann, Alexei Starovoitov,
	Jesper Dangaard Brouer
  Cc: Christoph Hellwig, BjörnTöpel, John Fastabend,
	Magnus Karlsson, makita.toshiaki
In-Reply-To: <152717306303.4777.4205616217877503311.stgit@firesoul>

Notice how this allow us get XDP statistic without affecting the XDP
performance, as tracepoint is no-longer activated on a per packet basis.

V5: Spotted by John Fastabend.
 Fix 'sent' also counted 'drops' in this patch, a later patch corrected
 this, but it was a mistake in this intermediate step.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/linux/bpf.h        |    6 ++++--
 include/trace/events/xdp.h |   39 +++++++++++++++++++++++++++++++++++++++
 kernel/bpf/devmap.c        |   27 +++++++++++++++++++++++----
 net/core/filter.c          |    2 +-
 4 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 23a809da452d..bbe297436e5d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -492,7 +492,8 @@ struct xdp_buff;
 struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
 void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
 void __dev_map_flush(struct bpf_map *map);
-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp);
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+		    struct net_device *dev_rx);
 
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
 void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
@@ -579,7 +580,8 @@ struct xdp_buff;
 struct bpf_dtab_netdev;
 
 static inline
-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+		    struct net_device *dev_rx)
 {
 	return 0;
 }
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 96104610d40e..2e9ef0650144 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -229,6 +229,45 @@ TRACE_EVENT(xdp_cpumap_enqueue,
 		  __entry->to_cpu)
 );
 
+TRACE_EVENT(xdp_devmap_xmit,
+
+	TP_PROTO(const struct bpf_map *map, u32 map_index,
+		 int sent, int drops,
+		 const struct net_device *from_dev,
+		 const struct net_device *to_dev),
+
+	TP_ARGS(map, map_index, sent, drops, from_dev, to_dev),
+
+	TP_STRUCT__entry(
+		__field(int, map_id)
+		__field(u32, act)
+		__field(u32, map_index)
+		__field(int, drops)
+		__field(int, sent)
+		__field(int, from_ifindex)
+		__field(int, to_ifindex)
+	),
+
+	TP_fast_assign(
+		__entry->map_id		= map->id;
+		__entry->act		= XDP_REDIRECT;
+		__entry->map_index	= map_index;
+		__entry->drops		= drops;
+		__entry->sent		= sent;
+		__entry->from_ifindex	= from_dev->ifindex;
+		__entry->to_ifindex	= to_dev->ifindex;
+	),
+
+	TP_printk("ndo_xdp_xmit"
+		  " map_id=%d map_index=%d action=%s"
+		  " sent=%d drops=%d"
+		  " from_ifindex=%d to_ifindex=%d",
+		  __entry->map_id, __entry->map_index,
+		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+		  __entry->sent, __entry->drops,
+		  __entry->from_ifindex, __entry->to_ifindex)
+);
+
 #endif /* _TRACE_XDP_H */
 
 #include <trace/define_trace.h>
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 15293b9dfb77..ff2f3bf59f2f 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -58,6 +58,7 @@
 #define DEV_MAP_BULK_SIZE 16
 struct xdp_bulk_queue {
 	struct xdp_frame *q[DEV_MAP_BULK_SIZE];
+	struct net_device *dev_rx;
 	unsigned int count;
 };
 
@@ -219,6 +220,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
 			 struct xdp_bulk_queue *bq)
 {
 	struct net_device *dev = obj->dev;
+	int sent = 0, drops = 0;
 	int i;
 
 	if (unlikely(!bq->count))
@@ -235,11 +237,18 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
 		int err;
 
 		err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
-		if (err)
+		if (err) {
+			drops++;
 			xdp_return_frame(xdpf);
+		} else {
+			sent++;
+		}
 	}
 	bq->count = 0;
 
+	trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
+			      sent, drops, bq->dev_rx, dev);
+	bq->dev_rx = NULL;
 	return 0;
 }
 
@@ -296,18 +305,28 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 /* Runs under RCU-read-side, plus in softirq under NAPI protection.
  * Thus, safe percpu variable access.
  */
-static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
+static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
+		      struct net_device *dev_rx)
+
 {
 	struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
 
 	if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
 		bq_xmit_all(obj, bq);
 
+	/* Ingress dev_rx will be the same for all xdp_frame's in
+	 * bulk_queue, because bq stored per-CPU and must be flushed
+	 * from net_device drivers NAPI func end.
+	 */
+	if (!bq->dev_rx)
+		bq->dev_rx = dev_rx;
+
 	bq->q[bq->count++] = xdpf;
 	return 0;
 }
 
-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+		    struct net_device *dev_rx)
 {
 	struct net_device *dev = dst->dev;
 	struct xdp_frame *xdpf;
@@ -319,7 +338,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
 
-	return bq_enqueue(dst, xdpf);
+	return bq_enqueue(dst, xdpf, dev_rx);
 }
 
 static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
diff --git a/net/core/filter.c b/net/core/filter.c
index c867106d3707..36cf2f87d742 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3067,7 +3067,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
 	case BPF_MAP_TYPE_DEVMAP: {
 		struct bpf_dtab_netdev *dst = fwd;
 
-		err = dev_map_enqueue(dst, xdp);
+		err = dev_map_enqueue(dst, xdp, dev_rx);
 		if (err)
 			return err;
 		__dev_map_insert_ctx(map, index);

^ permalink raw reply related

* [bpf-next V5 PATCH 4/8] samples/bpf: xdp_monitor use tracepoint xdp:xdp_devmap_xmit
From: Jesper Dangaard Brouer @ 2018-05-24 14:46 UTC (permalink / raw)
  To: netdev, Daniel Borkmann, Alexei Starovoitov,
	Jesper Dangaard Brouer
  Cc: Christoph Hellwig, BjörnTöpel, John Fastabend,
	Magnus Karlsson, makita.toshiaki
In-Reply-To: <152717306303.4777.4205616217877503311.stgit@firesoul>

The xdp_monitor sample/tool is updated to use the new tracepoint
xdp:xdp_devmap_xmit the previous patch just introduced.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 samples/bpf/xdp_monitor_kern.c |   39 +++++++++++++++++++++++++++++++++++
 samples/bpf/xdp_monitor_user.c |   44 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index 211db8ded0de..2854aa0665ea 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -208,3 +208,42 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
 
 	return 0;
 }
+
+struct bpf_map_def SEC("maps") devmap_xmit_cnt = {
+	.type		= BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size	= sizeof(u32),
+	.value_size	= sizeof(struct datarec),
+	.max_entries	= 1,
+};
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
+ * Code in:         kernel/include/trace/events/xdp.h
+ */
+struct devmap_xmit_ctx {
+	u64 __pad;		// First 8 bytes are not accessible by bpf code
+	int map_id;		//	offset:8;  size:4; signed:1;
+	u32 act;		//	offset:12; size:4; signed:0;
+	u32 map_index;		//	offset:16; size:4; signed:0;
+	int drops;		//	offset:20; size:4; signed:1;
+	int sent;		//	offset:24; size:4; signed:1;
+	int from_ifindex;	//	offset:28; size:4; signed:1;
+	int to_ifindex;		//	offset:32; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_devmap_xmit")
+int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx)
+{
+	struct datarec *rec;
+	u32 key = 0;
+
+	rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key);
+	if (!rec)
+		return 0;
+	rec->processed += ctx->sent;
+	rec->dropped   += ctx->drops;
+
+	/* Record bulk events, then userspace can calc average bulk size */
+	rec->info += 1;
+
+	return 1;
+}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index bf09b5188acd..7e18a454924c 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -141,6 +141,7 @@ struct stats_record {
 	struct record_u64 xdp_exception[XDP_ACTION_MAX];
 	struct record xdp_cpumap_kthread;
 	struct record xdp_cpumap_enqueue[MAX_CPUS];
+	struct record xdp_devmap_xmit;
 };
 
 static bool map_collect_record(int fd, __u32 key, struct record *rec)
@@ -397,7 +398,7 @@ static void stats_print(struct stats_record *stats_rec,
 			info = calc_info(r, p, t);
 			if (info > 0)
 				i_str = "sched";
-			if (pps > 0)
+			if (pps > 0 || drop > 0)
 				printf(fmt1, "cpumap-kthread",
 				       i, pps, drop, info, i_str);
 		}
@@ -409,6 +410,42 @@ static void stats_print(struct stats_record *stats_rec,
 		printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
 	}
 
+	/* devmap ndo_xdp_xmit stats */
+	{
+		char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s\n";
+		char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s\n";
+		struct record *rec, *prev;
+		double drop, info;
+		char *i_str = "";
+
+		rec  =  &stats_rec->xdp_devmap_xmit;
+		prev = &stats_prev->xdp_devmap_xmit;
+		t = calc_period(rec, prev);
+		for (i = 0; i < nr_cpus; i++) {
+			struct datarec *r = &rec->cpu[i];
+			struct datarec *p = &prev->cpu[i];
+
+			pps  = calc_pps(r, p, t);
+			drop = calc_drop(r, p, t);
+			info = calc_info(r, p, t);
+			if (info > 0) {
+				i_str = "bulk-average";
+				info = (pps+drop) / info; /* calc avg bulk */
+			}
+			if (pps > 0 || drop > 0)
+				printf(fmt1, "devmap-xmit",
+				       i, pps, drop, info, i_str);
+		}
+		pps = calc_pps(&rec->total, &prev->total, t);
+		drop = calc_drop(&rec->total, &prev->total, t);
+		info = calc_info(&rec->total, &prev->total, t);
+		if (info > 0) {
+			i_str = "bulk-average";
+			info = (pps+drop) / info; /* calc avg bulk */
+		}
+		printf(fmt2, "devmap-xmit", "total", pps, drop, info, i_str);
+	}
+
 	printf("\n");
 }
 
@@ -437,6 +474,9 @@ static bool stats_collect(struct stats_record *rec)
 	fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
 	map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
 
+	fd = map_data[4].fd; /* map4: devmap_xmit_cnt */
+	map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
+
 	return true;
 }
 
@@ -480,6 +520,7 @@ static struct stats_record *alloc_stats_record(void)
 
 	rec_sz = sizeof(struct datarec);
 	rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
+	rec->xdp_devmap_xmit.cpu    = alloc_rec_per_cpu(rec_sz);
 
 	for (i = 0; i < MAX_CPUS; i++)
 		rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
@@ -498,6 +539,7 @@ static void free_stats_record(struct stats_record *r)
 		free(r->xdp_exception[i].cpu);
 
 	free(r->xdp_cpumap_kthread.cpu);
+	free(r->xdp_devmap_xmit.cpu);
 
 	for (i = 0; i < MAX_CPUS; i++)
 		free(r->xdp_cpumap_enqueue[i].cpu);

^ permalink raw reply related

* [bpf-next V5 PATCH 5/8] xdp: introduce xdp_return_frame_rx_napi
From: Jesper Dangaard Brouer @ 2018-05-24 14:46 UTC (permalink / raw)
  To: netdev, Daniel Borkmann, Alexei Starovoitov,
	Jesper Dangaard Brouer
  Cc: Christoph Hellwig, BjörnTöpel, John Fastabend,
	Magnus Karlsson, makita.toshiaki
In-Reply-To: <152717306303.4777.4205616217877503311.stgit@firesoul>

When sending an xdp_frame through xdp_do_redirect call, then error
cases can happen where the xdp_frame needs to be dropped, and
returning an -errno code isn't sufficient/possible any-longer
(e.g. for cpumap case). This is already fully supported, by simply
calling xdp_return_frame.

This patch is an optimization, which provides xdp_return_frame_rx_napi,
which is a faster variant for these error cases.  It take advantage of
the protection provided by XDP RX running under NAPI protection.

This change is mostly relevant for drivers using the page_pool
allocator as it can take advantage of this. (Tested with mlx5).

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/net/page_pool.h |    5 +++--
 include/net/xdp.h       |    1 +
 kernel/bpf/cpumap.c     |    2 +-
 kernel/bpf/devmap.c     |    2 +-
 net/core/xdp.c          |   20 ++++++++++++++++----
 5 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index c79087153148..694d055e01ef 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -115,13 +115,14 @@ void page_pool_destroy(struct page_pool *pool);
 void __page_pool_put_page(struct page_pool *pool,
 			  struct page *page, bool allow_direct);
 
-static inline void page_pool_put_page(struct page_pool *pool, struct page *page)
+static inline void page_pool_put_page(struct page_pool *pool,
+				      struct page *page, bool allow_direct)
 {
 	/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
 	 * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
 	 */
 #ifdef CONFIG_PAGE_POOL
-	__page_pool_put_page(pool, page, false);
+	__page_pool_put_page(pool, page, allow_direct);
 #endif
 }
 /* Very limited use-cases allow recycle direct */
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 0b689cf561c7..7ad779237ae8 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 }
 
 void xdp_return_frame(struct xdp_frame *xdpf);
+void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
 void xdp_return_buff(struct xdp_buff *xdp);
 
 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index c95b04ec103e..e0918d180f08 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -578,7 +578,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
 		err = __ptr_ring_produce(q, xdpf);
 		if (err) {
 			drops++;
-			xdp_return_frame(xdpf);
+			xdp_return_frame_rx_napi(xdpf);
 		}
 		processed++;
 	}
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index ff2f3bf59f2f..a9cd5c93dd2b 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -239,7 +239,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
 		err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
 		if (err) {
 			drops++;
-			xdp_return_frame(xdpf);
+			xdp_return_frame_rx_napi(xdpf);
 		} else {
 			sent++;
 		}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index bf6758f74339..cb8c4e061a5a 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -308,7 +308,13 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
 
-static void xdp_return(void *data, struct xdp_mem_info *mem)
+/* XDP RX runs under NAPI protection, and in different delivery error
+ * scenarios (e.g. queue full), it is possible to return the xdp_frame
+ * while still leveraging this protection.  The @napi_direct boolian
+ * is used for those calls sites.  Thus, allowing for faster recycling
+ * of xdp_frames/pages in those cases.
+ */
+static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct)
 {
 	struct xdp_mem_allocator *xa;
 	struct page *page;
@@ -320,7 +326,7 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
 		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
 		page = virt_to_head_page(data);
 		if (xa)
-			page_pool_put_page(xa->page_pool, page);
+			page_pool_put_page(xa->page_pool, page, napi_direct);
 		else
 			put_page(page);
 		rcu_read_unlock();
@@ -340,12 +346,18 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
 
 void xdp_return_frame(struct xdp_frame *xdpf)
 {
-	xdp_return(xdpf->data, &xdpf->mem);
+	__xdp_return(xdpf->data, &xdpf->mem, false);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame);
 
+void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
+{
+	__xdp_return(xdpf->data, &xdpf->mem, true);
+}
+EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
+
 void xdp_return_buff(struct xdp_buff *xdp)
 {
-	xdp_return(xdp->data, &xdp->rxq->mem);
+	__xdp_return(xdp->data, &xdp->rxq->mem, true);
 }
 EXPORT_SYMBOL_GPL(xdp_return_buff);

^ permalink raw reply related

* [bpf-next V5 PATCH 6/8] xdp: change ndo_xdp_xmit API to support bulking
From: Jesper Dangaard Brouer @ 2018-05-24 14:46 UTC (permalink / raw)
  To: netdev, Daniel Borkmann, Alexei Starovoitov,
	Jesper Dangaard Brouer
  Cc: Christoph Hellwig, BjörnTöpel, John Fastabend,
	Magnus Karlsson, makita.toshiaki
In-Reply-To: <152717306303.4777.4205616217877503311.stgit@firesoul>

This patch change the API for ndo_xdp_xmit to support bulking
xdp_frames.

When kernel is compiled with CONFIG_RETPOLINE, XDP sees a huge slowdown.
Most of the slowdown is caused by DMA API indirect function calls, but
also the net_device->ndo_xdp_xmit() call.

Benchmarked patch with CONFIG_RETPOLINE, using xdp_redirect_map with
single flow/core test (CPU E5-1650 v4 @ 3.60GHz), showed
performance improved:
 for driver ixgbe: 6,042,682 pps -> 6,853,768 pps = +811,086 pps
 for driver i40e : 6,187,169 pps -> 6,724,519 pps = +537,350 pps

With frames avail as a bulk inside the driver ndo_xdp_xmit call,
further optimizations are possible, like bulk DMA-mapping for TX.

Testing without CONFIG_RETPOLINE show the same performance for
physical NIC drivers.

The virtual NIC driver tun sees a huge performance boost, as it can
avoid doing per frame producer locking, but instead amortize the
locking cost over the bulk.

V2: Fix compile errors reported by kbuild test robot <lkp@intel.com>
V4: Isolated ndo, driver changes and callers.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   |   26 +++++++---
 drivers/net/ethernet/intel/i40e/i40e_txrx.h   |    2 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   21 ++++++--
 drivers/net/tun.c                             |   37 +++++++++-----
 drivers/net/virtio_net.c                      |   66 +++++++++++++++++++------
 include/linux/netdevice.h                     |   14 +++--
 kernel/bpf/devmap.c                           |   29 +++++++----
 net/core/filter.c                             |    8 ++-
 8 files changed, 139 insertions(+), 64 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 5efa68de935b..9b698c5acd05 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3664,14 +3664,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
  * @dev: netdev
  * @xdp: XDP buffer
  *
- * Returns Zero if sent, else an error code
+ * Returns number of frames successfully sent. Frames that fail are
+ * free'ed via XDP return API.
+ *
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
  **/
-int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
+int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
 {
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	unsigned int queue_index = smp_processor_id();
 	struct i40e_vsi *vsi = np->vsi;
-	int err;
+	int drops = 0;
+	int i;
 
 	if (test_bit(__I40E_VSI_DOWN, vsi->state))
 		return -ENETDOWN;
@@ -3679,11 +3684,18 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
 	if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
 		return -ENXIO;
 
-	err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
-	if (err != I40E_XDP_TX)
-		return -ENOSPC;
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		int err;
 
-	return 0;
+		err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
+		if (err != I40E_XDP_TX) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		}
+	}
+
+	return n - drops;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index fdd2c55f03a6..eb8804b3d7b6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
 void i40e_detect_recover_hung(struct i40e_vsi *vsi);
 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
 bool __i40e_chk_linearize(struct sk_buff *skb);
-int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf);
+int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
 void i40e_xdp_flush(struct net_device *dev);
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 6652b201df5b..9645619f7729 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10017,11 +10017,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	}
 }
 
-static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
+static int ixgbe_xdp_xmit(struct net_device *dev, int n,
+			  struct xdp_frame **frames)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	struct ixgbe_ring *ring;
-	int err;
+	int drops = 0;
+	int i;
 
 	if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
 		return -ENETDOWN;
@@ -10033,11 +10035,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
 	if (unlikely(!ring))
 		return -ENXIO;
 
-	err = ixgbe_xmit_xdp_ring(adapter, xdpf);
-	if (err != IXGBE_XDP_TX)
-		return -ENOSPC;
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		int err;
 
-	return 0;
+		err = ixgbe_xmit_xdp_ring(adapter, xdpf);
+		if (err != IXGBE_XDP_TX) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		}
+	}
+
+	return n - drops;
 }
 
 static void ixgbe_xdp_flush(struct net_device *dev)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 44d4f3d25350..d3dcfcb1c4b3 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -70,6 +70,7 @@
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
 #include <net/sock.h>
+#include <net/xdp.h>
 #include <linux/seq_file.h>
 #include <linux/uio.h>
 #include <linux/skb_array.h>
@@ -1290,34 +1291,44 @@ static const struct net_device_ops tun_netdev_ops = {
 	.ndo_get_stats64	= tun_net_get_stats64,
 };
 
-static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame)
+static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
 {
 	struct tun_struct *tun = netdev_priv(dev);
 	struct tun_file *tfile;
 	u32 numqueues;
-	int ret = 0;
+	int drops = 0;
+	int cnt = n;
+	int i;
 
 	rcu_read_lock();
 
 	numqueues = READ_ONCE(tun->numqueues);
 	if (!numqueues) {
-		ret = -ENOSPC;
-		goto out;
+		rcu_read_unlock();
+		return -ENXIO; /* Caller will free/return all frames */
 	}
 
 	tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
 					    numqueues]);
-	/* Encode the XDP flag into lowest bit for consumer to differ
-	 * XDP buffer from sk_buff.
-	 */
-	if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) {
-		this_cpu_inc(tun->pcpu_stats->tx_dropped);
-		ret = -ENOSPC;
+
+	spin_lock(&tfile->tx_ring.producer_lock);
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdp = frames[i];
+		/* Encode the XDP flag into lowest bit for consumer to differ
+		 * XDP buffer from sk_buff.
+		 */
+		void *frame = tun_xdp_to_ptr(xdp);
+
+		if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
+			this_cpu_inc(tun->pcpu_stats->tx_dropped);
+			xdp_return_frame_rx_napi(xdp);
+			drops++;
+		}
 	}
+	spin_unlock(&tfile->tx_ring.producer_lock);
 
-out:
 	rcu_read_unlock();
-	return ret;
+	return cnt - drops;
 }
 
 static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
@@ -1327,7 +1338,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
 	if (unlikely(!frame))
 		return -EOVERFLOW;
 
-	return tun_xdp_xmit(dev, frame);
+	return tun_xdp_xmit(dev, 1, &frame);
 }
 
 static void tun_xdp_flush(struct net_device *dev)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f34794a76c4d..39a0783d1cde 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev)
 	virtqueue_kick(sq->vq);
 }
 
-static int __virtnet_xdp_xmit(struct virtnet_info *vi,
-			       struct xdp_frame *xdpf)
+static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
+				   struct send_queue *sq,
+				   struct xdp_frame *xdpf)
 {
 	struct virtio_net_hdr_mrg_rxbuf *hdr;
-	struct xdp_frame *xdpf_sent;
-	struct send_queue *sq;
-	unsigned int len;
-	unsigned int qp;
 	int err;
 
-	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
-	sq = &vi->sq[qp];
-
-	/* Free up any pending old buffers before queueing new ones. */
-	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
-		xdp_return_frame(xdpf_sent);
-
 	/* virtqueue want to use data area in-front of packet */
 	if (unlikely(xdpf->metasize > 0))
 		return -EOPNOTSUPP;
@@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi,
 	return 0;
 }
 
-static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
+static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
+				   struct xdp_frame *xdpf)
+{
+	struct xdp_frame *xdpf_sent;
+	struct send_queue *sq;
+	unsigned int len;
+	unsigned int qp;
+
+	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+	sq = &vi->sq[qp];
+
+	/* Free up any pending old buffers before queueing new ones. */
+	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
+		xdp_return_frame(xdpf_sent);
+
+	return __virtnet_xdp_xmit_one(vi, sq, xdpf);
+}
+
+static int virtnet_xdp_xmit(struct net_device *dev,
+			    int n, struct xdp_frame **frames)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct receive_queue *rq = vi->rq;
+	struct xdp_frame *xdpf_sent;
 	struct bpf_prog *xdp_prog;
+	struct send_queue *sq;
+	unsigned int len;
+	unsigned int qp;
+	int drops = 0;
+	int err;
+	int i;
+
+	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+	sq = &vi->sq[qp];
 
 	/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
 	 * indicate XDP resources have been successfully allocated.
@@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
 	if (!xdp_prog)
 		return -ENXIO;
 
-	return __virtnet_xdp_xmit(vi, xdpf);
+	/* Free up any pending old buffers before queueing new ones. */
+	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
+		xdp_return_frame(xdpf_sent);
+
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+
+		err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
+		if (err) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		}
+	}
+	return n - drops;
 }
 
 static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
@@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 			xdpf = convert_to_xdp_frame(&xdp);
 			if (unlikely(!xdpf))
 				goto err_xdp;
-			err = __virtnet_xdp_xmit(vi, xdpf);
+			err = __virtnet_xdp_tx_xmit(vi, xdpf);
 			if (unlikely(err)) {
 				trace_xdp_exception(vi->dev, xdp_prog, act);
 				goto err_xdp;
@@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 			xdpf = convert_to_xdp_frame(&xdp);
 			if (unlikely(!xdpf))
 				goto err_xdp;
-			err = __virtnet_xdp_xmit(vi, xdpf);
+			err = __virtnet_xdp_tx_xmit(vi, xdpf);
 			if (unlikely(err)) {
 				trace_xdp_exception(vi->dev, xdp_prog, act);
 				if (unlikely(xdp_page != page))
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 03ed492c4e14..debdb6286170 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1185,9 +1185,13 @@ struct dev_ifalias {
  *	This function is used to set or query state related to XDP on the
  *	netdevice and manage BPF offload. See definition of
  *	enum bpf_netdev_command for details.
- * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp);
- *	This function is used to submit a XDP packet for transmit on a
- *	netdevice.
+ * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
+ *	This function is used to submit @n XDP packets for transmit on a
+ *	netdevice. Returns number of frames successfully transmitted, frames
+ *	that got dropped are freed/returned via xdp_return_frame().
+ *	Returns negative number, means general error invoking ndo, meaning
+ *	no frames were xmit'ed and core-caller will free all frames.
+ *	TODO: Consider add flag to allow sending flush operation.
  * void (*ndo_xdp_flush)(struct net_device *dev);
  *	This function is used to inform the driver to flush a particular
  *	xdp tx queue. Must be called on same CPU as xdp_xmit.
@@ -1375,8 +1379,8 @@ struct net_device_ops {
 						       int needed_headroom);
 	int			(*ndo_bpf)(struct net_device *dev,
 					   struct netdev_bpf *bpf);
-	int			(*ndo_xdp_xmit)(struct net_device *dev,
-						struct xdp_frame *xdp);
+	int			(*ndo_xdp_xmit)(struct net_device *dev, int n,
+						struct xdp_frame **xdp);
 	void			(*ndo_xdp_flush)(struct net_device *dev);
 };
 
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a9cd5c93dd2b..77908311ec98 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -232,24 +232,31 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
 		prefetch(xdpf);
 	}
 
-	for (i = 0; i < bq->count; i++) {
-		struct xdp_frame *xdpf = bq->q[i];
-		int err;
-
-		err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
-		if (err) {
-			drops++;
-			xdp_return_frame_rx_napi(xdpf);
-		} else {
-			sent++;
-		}
+	sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q);
+	if (sent < 0) {
+		sent = 0;
+		goto error;
 	}
+	drops = bq->count - sent;
+out:
 	bq->count = 0;
 
 	trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
 			      sent, drops, bq->dev_rx, dev);
 	bq->dev_rx = NULL;
 	return 0;
+error:
+	/* If ndo_xdp_xmit fails with an errno, no frames have been
+	 * xmit'ed and it's our responsibility to them free all.
+	 */
+	for (i = 0; i < bq->count; i++) {
+		struct xdp_frame *xdpf = bq->q[i];
+
+		/* RX path under NAPI protection, can return frames faster */
+		xdp_return_frame_rx_napi(xdpf);
+		drops++;
+	}
+	goto out;
 }
 
 /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
diff --git a/net/core/filter.c b/net/core/filter.c
index 36cf2f87d742..1d75f9322275 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3039,7 +3039,7 @@ static int __bpf_tx_xdp(struct net_device *dev,
 			u32 index)
 {
 	struct xdp_frame *xdpf;
-	int err;
+	int sent;
 
 	if (!dev->netdev_ops->ndo_xdp_xmit) {
 		return -EOPNOTSUPP;
@@ -3049,9 +3049,9 @@ static int __bpf_tx_xdp(struct net_device *dev,
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
 
-	err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
-	if (err)
-		return err;
+	sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf);
+	if (sent <= 0)
+		return sent;
 	dev->netdev_ops->ndo_xdp_flush(dev);
 	return 0;
 }

^ permalink raw reply related

* [bpf-next V5 PATCH 7/8] xdp/trace: extend tracepoint in devmap with an err
From: Jesper Dangaard Brouer @ 2018-05-24 14:46 UTC (permalink / raw)
  To: netdev, Daniel Borkmann, Alexei Starovoitov,
	Jesper Dangaard Brouer
  Cc: Christoph Hellwig, BjörnTöpel, John Fastabend,
	Magnus Karlsson, makita.toshiaki
In-Reply-To: <152717306303.4777.4205616217877503311.stgit@firesoul>

Extending tracepoint xdp:xdp_devmap_xmit in devmap with an err code
allow people to easier identify the reason behind the ndo_xdp_xmit
call to a given driver is failing.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/trace/events/xdp.h |   10 ++++++----
 kernel/bpf/devmap.c        |    5 +++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 2e9ef0650144..1ecf4c67fcf7 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -234,9 +234,9 @@ TRACE_EVENT(xdp_devmap_xmit,
 	TP_PROTO(const struct bpf_map *map, u32 map_index,
 		 int sent, int drops,
 		 const struct net_device *from_dev,
-		 const struct net_device *to_dev),
+		 const struct net_device *to_dev, int err),
 
-	TP_ARGS(map, map_index, sent, drops, from_dev, to_dev),
+	TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err),
 
 	TP_STRUCT__entry(
 		__field(int, map_id)
@@ -246,6 +246,7 @@ TRACE_EVENT(xdp_devmap_xmit,
 		__field(int, sent)
 		__field(int, from_ifindex)
 		__field(int, to_ifindex)
+		__field(int, err)
 	),
 
 	TP_fast_assign(
@@ -256,16 +257,17 @@ TRACE_EVENT(xdp_devmap_xmit,
 		__entry->sent		= sent;
 		__entry->from_ifindex	= from_dev->ifindex;
 		__entry->to_ifindex	= to_dev->ifindex;
+		__entry->err		= err;
 	),
 
 	TP_printk("ndo_xdp_xmit"
 		  " map_id=%d map_index=%d action=%s"
 		  " sent=%d drops=%d"
-		  " from_ifindex=%d to_ifindex=%d",
+		  " from_ifindex=%d to_ifindex=%d err=%d",
 		  __entry->map_id, __entry->map_index,
 		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
 		  __entry->sent, __entry->drops,
-		  __entry->from_ifindex, __entry->to_ifindex)
+		  __entry->from_ifindex, __entry->to_ifindex, __entry->err)
 );
 
 #endif /* _TRACE_XDP_H */
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 77908311ec98..ae16d0c373ef 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -220,7 +220,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
 			 struct xdp_bulk_queue *bq)
 {
 	struct net_device *dev = obj->dev;
-	int sent = 0, drops = 0;
+	int sent = 0, drops = 0, err = 0;
 	int i;
 
 	if (unlikely(!bq->count))
@@ -234,6 +234,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
 
 	sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q);
 	if (sent < 0) {
+		err = sent;
 		sent = 0;
 		goto error;
 	}
@@ -242,7 +243,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
 	bq->count = 0;
 
 	trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
-			      sent, drops, bq->dev_rx, dev);
+			      sent, drops, bq->dev_rx, dev, err);
 	bq->dev_rx = NULL;
 	return 0;
 error:

^ permalink raw reply related

* [bpf-next V5 PATCH 8/8] samples/bpf: xdp_monitor use err code from tracepoint xdp:xdp_devmap_xmit
From: Jesper Dangaard Brouer @ 2018-05-24 14:46 UTC (permalink / raw)
  To: netdev, Daniel Borkmann, Alexei Starovoitov,
	Jesper Dangaard Brouer
  Cc: Christoph Hellwig, BjörnTöpel, John Fastabend,
	Magnus Karlsson, makita.toshiaki
In-Reply-To: <152717306303.4777.4205616217877503311.stgit@firesoul>

Update xdp_monitor to use the recently added err code introduced
in tracepoint xdp:xdp_devmap_xmit, to show if the drop count is
caused by some driver general delivery problem.  Other kind of drops
will likely just be more normal TX space issues.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 samples/bpf/xdp_monitor_kern.c |   10 ++++++++++
 samples/bpf/xdp_monitor_user.c |   35 ++++++++++++++++++++++++++++++-----
 2 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index 2854aa0665ea..ad10fe700d7d 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -125,6 +125,7 @@ struct datarec {
 	u64 processed;
 	u64 dropped;
 	u64 info;
+	u64 err;
 };
 #define MAX_CPUS 64
 
@@ -228,6 +229,7 @@ struct devmap_xmit_ctx {
 	int sent;		//	offset:24; size:4; signed:1;
 	int from_ifindex;	//	offset:28; size:4; signed:1;
 	int to_ifindex;		//	offset:32; size:4; signed:1;
+	int err;		//	offset:36; size:4; signed:1;
 };
 
 SEC("tracepoint/xdp/xdp_devmap_xmit")
@@ -245,5 +247,13 @@ int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx)
 	/* Record bulk events, then userspace can calc average bulk size */
 	rec->info += 1;
 
+	/* Record error cases, where no frame were sent */
+	if (ctx->err)
+		rec->err++;
+
+	/* Catch API error of drv ndo_xdp_xmit sent more than count */
+	if (ctx->drops < 0)
+		rec->err++;
+
 	return 1;
 }
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index 7e18a454924c..dd558cbb2309 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -117,6 +117,7 @@ struct datarec {
 	__u64 processed;
 	__u64 dropped;
 	__u64 info;
+	__u64 err;
 };
 #define MAX_CPUS 64
 
@@ -152,6 +153,7 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec)
 	__u64 sum_processed = 0;
 	__u64 sum_dropped = 0;
 	__u64 sum_info = 0;
+	__u64 sum_err = 0;
 	int i;
 
 	if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
@@ -170,10 +172,13 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec)
 		sum_dropped        += values[i].dropped;
 		rec->cpu[i].info = values[i].info;
 		sum_info        += values[i].info;
+		rec->cpu[i].err = values[i].err;
+		sum_err        += values[i].err;
 	}
 	rec->total.processed = sum_processed;
 	rec->total.dropped   = sum_dropped;
 	rec->total.info      = sum_info;
+	rec->total.err       = sum_err;
 	return true;
 }
 
@@ -274,6 +279,18 @@ static double calc_info(struct datarec *r, struct datarec *p, double period)
 	return pps;
 }
 
+static double calc_err(struct datarec *r, struct datarec *p, double period)
+{
+	__u64 packets = 0;
+	double pps = 0;
+
+	if (period > 0) {
+		packets = r->err - p->err;
+		pps = packets / period;
+	}
+	return pps;
+}
+
 static void stats_print(struct stats_record *stats_rec,
 			struct stats_record *stats_prev,
 			bool err_only)
@@ -412,11 +429,12 @@ static void stats_print(struct stats_record *stats_rec,
 
 	/* devmap ndo_xdp_xmit stats */
 	{
-		char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s\n";
-		char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s\n";
+		char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n";
+		char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n";
 		struct record *rec, *prev;
-		double drop, info;
+		double drop, info, err;
 		char *i_str = "";
+		char *err_str = "";
 
 		rec  =  &stats_rec->xdp_devmap_xmit;
 		prev = &stats_prev->xdp_devmap_xmit;
@@ -428,22 +446,29 @@ static void stats_print(struct stats_record *stats_rec,
 			pps  = calc_pps(r, p, t);
 			drop = calc_drop(r, p, t);
 			info = calc_info(r, p, t);
+			err  = calc_err(r, p, t);
 			if (info > 0) {
 				i_str = "bulk-average";
 				info = (pps+drop) / info; /* calc avg bulk */
 			}
+			if (err > 0)
+				err_str = "drv-err";
 			if (pps > 0 || drop > 0)
 				printf(fmt1, "devmap-xmit",
-				       i, pps, drop, info, i_str);
+				       i, pps, drop, info, i_str, err_str);
 		}
 		pps = calc_pps(&rec->total, &prev->total, t);
 		drop = calc_drop(&rec->total, &prev->total, t);
 		info = calc_info(&rec->total, &prev->total, t);
+		err  = calc_err(&rec->total, &prev->total, t);
 		if (info > 0) {
 			i_str = "bulk-average";
 			info = (pps+drop) / info; /* calc avg bulk */
 		}
-		printf(fmt2, "devmap-xmit", "total", pps, drop, info, i_str);
+		if (err > 0)
+			err_str = "drv-err";
+		printf(fmt2, "devmap-xmit", "total", pps, drop,
+		       info, i_str, err_str);
 	}
 
 	printf("\n");

^ permalink raw reply related

* Re: [PATCH 0/4] RFC CPSW switchdev mode
From: Andrew Lunn @ 2018-05-24 14:54 UTC (permalink / raw)
  To: Ilias Apalodimas
  Cc: Ivan Vecera, Jiri Pirko, netdev, grygorii.strashko,
	ivan.khoronzhuk, nsekhar, francois.ozog, yogeshs, spatton
In-Reply-To: <20180524140831.GA16793@apalos>

> There's configuration needs from customers adding or not adding a VLAN to the
> CPU port. In my configuration examples for instance, if the cpu port is not
> added to the bridge, you cannot get an ip address on it. 

If you cannot get an IP address, it is plain broken. The whole idea is
that switch port interfaces are just linux interfaces. A linux
interface which cannot get an IP address is broken.

> Similar cases exist for customers on adding MDBs as far as i know. So they want
> the "customer facing ports" to have the MDBs present but not the cpu port.

That i can understand. And it should actually work now with
switchdev. It performs IGMP snooping, and if there is nothing joining
the group on the CPU, it won't add an MDB entry to forward traffic to
the CPU.

> Adding a cpu port that cannot transmit or receive traffic is a bit "weird"

And how is it supposed to send BPDUs? STP is going to be broken....

    Andrew

^ permalink raw reply

* Oops: 0000 [#1] SMP in skb_release_data, openvswitch related
From: Hans van Kranenburg @ 2018-05-24 14:49 UTC (permalink / raw)
  To: netdev, dev; +Cc: Kranenburg, Hans van, Eric Dumazet, 899044

[-- Attachment #1: Type: text/plain, Size: 4288 bytes --]

To: netdev, dev@openvswitch
Cc: Eric Dumazet (author of ff04a771ad), debian bug

Hi,

As follow-up to my bug report at Debian [0], I'm trying to do bug triage
and find out more. I'm not the expert here, but anything could help, and
it's an opportunity to learn things.

I'm observing the attached errors ('general protection fault: 0000 [#1]
SMP' and 'BUG: unable to handle kernel paging request') on machines that
are Xen dom0 and running a 4.9.88 Debian Stretch kernel as dom0 kernel.
The errors have been happening a few times in the last few weeks. It
started after upgrading them from Jessie and 3.16 kernel to Stretch with
4.9 kernel.

The traces printed look very much alike every time.

If I look up the listed address, I get:

-$ addr2line -e /usr/lib/debug/boot/vmlinux-4.9.0-6-amd64 -i -a
ffffffff814f5c7d
0xffffffff814f5c7d
./debian/build/build_amd64_none_amd64/./include/linux/compiler.h:243
(discriminator 3)
./debian/build/build_amd64_none_amd64/./include/linux/page-flags.h:143
(discriminator 3)
./debian/build/build_amd64_none_amd64/./include/linux/mm.h:779
(discriminator 3)
./debian/build/build_amd64_none_amd64/./include/linux/skbuff.h:2592
(discriminator 3)
./debian/build/build_amd64_none_amd64/./net/core/skbuff.c:594
(discriminator 3)

 583 static void skb_release_data(struct sk_buff *skb)
 584 {
 585         struct skb_shared_info *shinfo = skb_shinfo(skb);
 586         int i;
 587
 588         if (skb->cloned &&
 589             atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT)
+ 1 : 1,
 590                               &shinfo->dataref))
 591                 return;
 592
 593         for (i = 0; i < shinfo->nr_frags; i++)
 594   ----->        __skb_frag_unref(&shinfo->frags[i]);    <------
 595
 596         /*
 597          * If skb buf is from userspace, we need to notify the caller
 598          * the lower device DMA has done;
 599          */
 600         if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
 601                 struct ubuf_info *uarg;
 602
 603                 uarg = shinfo->destructor_arg;
 604                 if (uarg->callback)
 605                         uarg->callback(uarg, true);
 606         }
 607
 608         if (shinfo->frag_list)
 609                 kfree_skb_list(shinfo->frag_list);
 610
 611         skb_free_head(skb);
 612 }

The most recent (well, from 2014) biggest change in this area is...

commit ff04a771ad25fc9ba91690e73465b4d34b6bf8b3
Author: Eric Dumazet <edumazet@google.com>
Date:   Tue Sep 23 18:39:30 2014 -0700

    net : optimize skb_release_data()

...which is not present in the 3.16.y kernel that Debian Jessie still
uses, and which does not hit this problem (however, also using older
openvswitch userspace components).

Other changes in this area mention zero copy IO, which sounds like
something openvswitch could be using.

-- background: openvswitch usage --

For networking between domUs and the outside world, we use openvswitch.

After such an error happens:
* The amount of "flows" in the kernel quickly raises to the limit,
10000, as seen in output of ovs-dpctl show.
* Network traffic that should flow through the openvswitch bridge starts
disappearing in a seemingly random way (probably because it can't handle
new traffic flows).
* The memory usage of the userspace ovs-vswitchd starts growing quickly.
* Many of the ovs commands, like to add or remove an interface or bridge
hang.

After a restart of the openvswitch-switch service, and fixing up a bunch
of configuration of connected interfaces, functionality is restored.

While most of the symptoms seem related to userspace openvswitch
processes, the cause of it all seems to be in the kernel, while the
userspace ovs-vswitchd process is receiving a network packet?

-- reproducer --

I don't have a reliable reproducer yet, except for waiting days or weeks
until it randomly happens somewhere. There's no sign of unusual amounts
of traffic / load etc when it happens.

An idea I can come up with is builing a semi-random udp packet generator
to start stressing the code path from kernel to ovs-vswitchd.

If I succeed reproducing, I can start trying other kernels or changes.

Please advice what else I could do to help resolving this issue.

Thanks,
Regards,

Hans van Kranenburg

[0] https://bugs.debian.org/899044

[-- Attachment #2: kernel-errors.txt --]
[-- Type: text/plain, Size: 20268 bytes --]

May  4 08:23:03 altair kernel: [83978.662075] BUG: unable to handle kernel paging request at 000000030000001f
May  4 08:23:03 altair kernel: [83978.665887] IP: [<ffffffff814f5c7d>] skb_release_data+0x8d/0x110
May  4 08:23:03 altair kernel: [83978.669837] PGD 0 
May  4 08:23:03 altair kernel: [83978.669882] 
May  4 08:23:03 altair kernel: [83978.673589] Oops: 0000 [#1] SMP
May  4 08:23:03 altair kernel: [83978.677281] Modules linked in: cls_u32 sch_ingress act_mirred sch_fq_codel ifb xt_mark sch_htb xt_physdev br_netfilter bridge stp llc xen_netback xen_blkback algif_skcipher af_alg dm_service_time binfmt_misc xen_gntdev xen_evtchn openvswitch nf_nat_ipv6 libcrc32c xenfs xen_privcmd ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6table_mangle ip6table_raw ip6_tables ipt_REJECT nf_reject_ipv4 xt_tcpudp xt_owner xt_multiport xt_conntrack iptable_filter iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_raw dm_crypt intel_powerclamp crct10dif_pclmul crc32_pclmul iTCO_wdt iTCO_vendor_support ghash_clmulni_intel pcspkr serio_raw joydev evdev amdkfd radeon ttm drm_kms_helper drm i2c_algo_bit lpc_ich mfd_core i7core_edac hpilo
May  4 08:23:03 altair kernel: [83978.701936]  sg ipmi_si hpwdt edac_core ipmi_msghandler acpi_power_meter button shpchp dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 ext4 crc16 jbd2 fscrypto ecb mbcache btrfs crc32c_generic xor raid6_pq mlx4_en ptp pps_core hid_generic usbhid hid sd_mod crc32c_intel aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd psmouse ehci_pci uhci_hcd ehci_hcd usbcore usb_common hpsa scsi_transport_sas bnx2 mlx4_core devlink scsi_mod thermal
May  4 08:23:03 altair kernel: [83978.724406] CPU: 1 PID: 1486 Comm: revalidator7 Not tainted 4.9.0-6-amd64 #1 Debian 4.9.88-1
May  4 08:23:03 altair kernel: [83978.729139] Hardware name: HP ProLiant DL360 G7, BIOS P68 08/16/2015
May  4 08:23:03 altair kernel: [83978.733958] task: ffff880119e1ee80 task.stack: ffffc90042764000
May  4 08:23:03 altair kernel: [83978.738724] RIP: e030:[<ffffffff814f5c7d>]  [<ffffffff814f5c7d>] skb_release_data+0x8d/0x110
May  4 08:23:03 altair kernel: [83978.743560] RSP: e02b:ffffc90042767c78  EFLAGS: 00010206
May  4 08:23:03 altair kernel: [83978.748352] RAX: 0000000000000050 RBX: 00000002ffffffff RCX: ffffffff81ce0f40
May  4 08:23:03 altair kernel: [83978.753116] RDX: ffffffffffffffff RSI: ffff8800cc998900 RDI: ffff8800cc998900
May  4 08:23:03 altair kernel: [83978.757867] RBP: ffff8800cc998900 R08: ffff880123c00000 R09: ffff88011f220000
May  4 08:23:03 altair kernel: [83978.762598] R10: ffff8800cc998900 R11: ffff880119e10280 R12: 0000000000000002
May  4 08:23:03 altair kernel: [83978.767321] R13: ffff88011f227ec0 R14: ffff88011dea2800 R15: 0000000000000000
May  4 08:23:03 altair kernel: [83978.772000] FS:  00007fc1656cc700(0000) GS:ffff880128240000(0000) knlGS:0000000000000000
May  4 08:23:03 altair kernel: [83978.776671] CS:  e033 DS: 0000 ES: 0000 CR0: 0000000080050033
May  4 08:23:03 altair kernel: [83978.781355] CR2: 000000030000001f CR3: 00000001212b1000 CR4: 0000000000002660
May  4 08:23:03 altair kernel: [83978.786135] Stack:
May  4 08:23:03 altair kernel: [83978.790841]  ffff880120a28000 ffff8800cc998900 ffffc90042767ec0 0000000000007ea4
May  4 08:23:03 altair kernel: [83978.795898]  ffffffff814f6267 ffff880120a28000 ffff8800cc998900 ffffffff814fcc91
May  4 08:23:03 altair kernel: [83978.800806]  ffff880120a28000 ffffffff8153f2df ffffc90000000000 ffff8800cc998900
May  4 08:23:03 altair kernel: [83978.805723] Call Trace:
May  4 08:23:03 altair kernel: [83978.810654]  [<ffffffff814f6267>] ? consume_skb+0x27/0x80
May  4 08:23:03 altair kernel: [83978.815626]  [<ffffffff814fcc91>] ? skb_free_datagram+0x11/0x40
May  4 08:23:03 altair kernel: [83978.820545]  [<ffffffff8153f2df>] ? netlink_recvmsg+0x19f/0x440
May  4 08:23:03 altair kernel: [83978.825426]  [<ffffffff814ed4ca>] ? ___sys_recvmsg+0xda/0x1f0
May  4 08:23:03 altair kernel: [83978.830273]  [<ffffffff812237fb>] ? file_update_time+0xcb/0x110
May  4 08:23:03 altair kernel: [83978.835058]  [<ffffffff8120fbeb>] ? pipe_write+0x29b/0x3e0
May  4 08:23:03 altair kernel: [83978.839800]  [<ffffffff812066b0>] ? new_sync_write+0xe0/0x130
May  4 08:23:03 altair kernel: [83978.844502]  [<ffffffff814edf4e>] ? __sys_recvmsg+0x4e/0x90
May  4 08:23:03 altair kernel: [83978.849161]  [<ffffffff81003b7d>] ? do_syscall_64+0x8d/0xf0
May  4 08:23:03 altair kernel: [83978.853779]  [<ffffffff8161244e>] ? entry_SYSCALL_64_after_swapgs+0x58/0xc6
May  4 08:23:03 altair kernel: [83978.858397] Code: 03 48 c1 e8 37 83 e0 07 83 f8 04 74 49 41 0f b6 45 00 41 83 c4 01 44 39 e0 7e 51 49 63 c4 48 83 c0 03 48 c1 e0 04 49 8b 5c 05 00 <48> 8b 43 20 48 8d 50 ff a8 01 48 0f 45 da f0 ff 4b 1c 75 bf 48 
May  4 08:23:03 altair kernel: [83978.868227] RIP  [<ffffffff814f5c7d>] skb_release_data+0x8d/0x110
May  4 08:23:03 altair kernel: [83978.873017]  RSP <ffffc90042767c78>
May  4 08:23:03 altair kernel: [83978.877746


May  4 22:00:22 sirius kernel: [1999361.378086] BUG: unable to handle kernel NULL pointer dereference at 00000000000001e0
May  4 22:00:22 sirius kernel: [1999361.381804] IP: [<ffffffff814f4c7d>] skb_release_data+0x8d/0x110
May  4 22:00:22 sirius kernel: [1999361.385492] PGD 0 
May  4 22:00:22 sirius kernel: [1999361.385535] 
May  4 22:00:22 sirius kernel: [1999361.389145] Oops: 0000 [#1] SMP
May  4 22:00:22 sirius kernel: [1999361.392725] Modules linked in: xt_physdev br_netfilter bridge stp llc xen_netback xen_blkback algif_skcipher af_alg dm_service_time binfmt_misc openvswitch nf_nat_ipv6 libcrc32c xen_gntdev xen_evtchn xenfs xen_privcmd ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6table_mangle ip6table_raw ip6_tables ipt_REJECT nf_reject_ipv4 xt_tcpudp xt_owner xt_multiport xt_conntrack iptable_filter iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_raw dm_crypt intel_powerclamp crct10dif_pclmul crc32_pclmul amdkfd iTCO_wdt evdev joydev iTCO_vendor_support ghash_clmulni_intel radeon ttm serio_raw pcspkr drm_kms_helper drm i2c_algo_bit sg i7core_edac lpc_ich ipmi_si acpi_power_meter hpilo hpwdt mfd_core edac_core ipmi_msghandler button
May  4 22:00:22 sirius kernel: [1999361.416634]  shpchp dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 ext4 crc16 jbd2 fscrypto ecb mbcache btrfs crc32c_generic xor raid6_pq mlx4_en ptp pps_core hid_generic usbhid hid sd_mod crc32c_intel aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd psmouse ehci_pci uhci_hcd ehci_hcd usbcore usb_common mlx4_core hpsa scsi_transport_sas bnx2 devlink scsi_mod thermal
May  4 22:00:22 sirius kernel: [1999361.438322] CPU: 2 PID: 1400 Comm: revalidator9 Not tainted 4.9.0-6-amd64 #1 Debian 4.9.82-1+deb9u3
May  4 22:00:22 sirius kernel: [1999361.442773] Hardware name: HP ProLiant DL360 G7, BIOS P68 08/16/2015
May  4 22:00:22 sirius kernel: [1999361.447219] task: ffff880111c58540 task.stack: ffffc90041bcc000
May  4 22:00:22 sirius kernel: [1999361.451796] RIP: e030:[<ffffffff814f4c7d>]  [<ffffffff814f4c7d>] skb_release_data+0x8d/0x110
May  4 22:00:22 sirius kernel: [1999361.456294] RSP: e02b:ffffc90041bcfc78  EFLAGS: 00010206
May  4 22:00:22 sirius kernel: [1999361.460758] RAX: 0000000000000030 RBX: 00000000000001c0 RCX: ffffffff81ce0e00
May  4 22:00:22 sirius kernel: [1999361.465261] RDX: 0000000000008100 RSI: ffff880118a94f00 RDI: ffff880118a94f00
May  4 22:00:22 sirius kernel: [1999361.469724] RBP: ffff880118a94f00 R08: ffff88011bc00000 R09: ffff8800b0218000
May  4 22:00:22 sirius kernel: [1999361.474230] R10: ffff880118a94f00 R11: ffff880111c50240 R12: 0000000000000000
May  4 22:00:22 sirius kernel: [1999361.478710] R13: ffff8800b021fec0 R14: ffff8800b8356a40 R15: 0000000000000000
May  4 22:00:22 sirius kernel: [1999361.483220] FS:  00007faa54946700(0000) GS:ffff880120280000(0000) knlGS:0000000000000000
May  4 22:00:22 sirius kernel: [1999361.487736] CS:  e033 DS: 0000 ES: 0000 CR0: 0000000080050033
May  4 22:00:22 sirius kernel: [1999361.492181] CR2: 00000000000001e0 CR3: 0000000119ce9000 CR4: 0000000000002660
May  4 22:00:22 sirius kernel: [1999361.496661] Stack:
May  4 22:00:22 sirius kernel: [1999361.501036]  ffff8801190a2800 ffff880118a94f00 ffffc90041bcfec0 0000000000007eac
May  4 22:00:22 sirius kernel: [1999361.505470]  ffffffff814f5267 ffff8801190a2800 ffff880118a94f00 ffffffff814fbc91
May  4 22:00:22 sirius kernel: [1999361.509844]  ffff8801190a2800 ffffffff8153e2bf ffffc90000000000 ffff880118a94f00
May  4 22:00:22 sirius kernel: [1999361.514213] Call Trace:
May  4 22:00:22 sirius kernel: [1999361.518499]  [<ffffffff814f5267>] ? consume_skb+0x27/0x80
May  4 22:00:22 sirius kernel: [1999361.522818]  [<ffffffff814fbc91>] ? skb_free_datagram+0x11/0x40
May  4 22:00:22 sirius kernel: [1999361.527109]  [<ffffffff8153e2bf>] ? netlink_recvmsg+0x19f/0x440
May  4 22:00:22 sirius kernel: [1999361.531314]  [<ffffffff814ec4ca>] ? ___sys_recvmsg+0xda/0x1f0
May  4 22:00:22 sirius kernel: [1999361.535488]  [<ffffffff812221ab>] ? file_update_time+0xcb/0x110
May  4 22:00:22 sirius kernel: [1999361.539626]  [<ffffffff8120e5cb>] ? pipe_write+0x29b/0x3e0
May  4 22:00:22 sirius kernel: [1999361.543790]  [<ffffffff812050a0>] ? new_sync_write+0xe0/0x130
May  4 22:00:22 sirius kernel: [1999361.547989]  [<ffffffff814ecf4e>] ? __sys_recvmsg+0x4e/0x90
May  4 22:00:22 sirius kernel: [1999361.552218]  [<ffffffff81003b7f>] ? do_syscall_64+0x8f/0xf0
May  4 22:00:22 sirius kernel: [1999361.556467]  [<ffffffff816113b8>] ? entry_SYSCALL_64_after_swapgs+0x42/0xb0
May  4 22:00:22 sirius kernel: [1999361.560791] Code: 03 48 c1 e8 37 83 e0 07 83 f8 04 74 49 41 0f b6 45 00 41 83 c4 01 44 39 e0 7e 51 49 63 c4 48 83 c0 03 48 c1 e0 04 49 8b 5c 05 00 <48> 8b 43 20 48 8d 50 ff a8 01 48 0f 45 da f0 ff 4b 1c 75 bf 48 
May  4 22:00:22 sirius kernel: [1999361.570202] RIP  [<ffffffff814f4c7d>] skb_release_data+0x8d/0x110
May  4 22:00:22 sirius kernel: [1999361.575033]  RSP <ffffc90041bcfc78>
May  4 22:00:22 sirius kernel: [1999361.579731] CR2: 00000000000001e0
May  4 22:00:22 sirius kernel: [1999361.599233] ---[ end trace de6345fc470c5362 ]---



May 18 13:49:26 omega kernel: [1213243.942643] general protection fault: 0000 [#1] SMP
May 18 13:49:26 omega kernel: [1213243.946704] Modules linked in: xt_physdev br_netfilter bridge stp llc xen_netback xen_blkback algif_skcipher af_alg dm_service_time xen_gntdev openvswitch xen_evtchn nf_nat_ipv6 libcrc32c xenfs xen_privcmd ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6table_mangle ip6table_raw ip6_tables ipt_REJECT nf_reject_ipv4 xt_tcpudp xt_owner xt_multiport xt_conntrack iptable_filter iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_raw dm_crypt amdkfd radeon intel_powerclamp crct10dif_pclmul iTCO_wdt crc32_pclmul iTCO_vendor_support ttm ghash_clmulni_intel hpwdt pcspkr drm_kms_helper drm serio_raw evdev i2c_algo_bit joydev sg hpilo lpc_ich mfd_core i7core_edac ipmi_si edac_core ipmi_msghandler acpi_power_meter shpchp button dm_multipath
May 18 13:49:26 omega kernel: [1213243.973478]  dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 ext4 crc16 jbd2 fscrypto ecb mbcache btrfs crc32c_generic xor raid6_pq mlx4_en ptp pps_core hid_generic usbhid hid sd_mod crc32c_intel aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd psmouse uhci_hcd ehci_pci ehci_hcd usbcore usb_common hpsa bnx2 mlx4_core scsi_transport_sas devlink scsi_mod thermal
May 18 13:49:26 omega kernel: [1213243.997290] CPU: 2 PID: 1582 Comm: revalidator9 Not tainted 4.9.0-6-amd64 #1 Debian 4.9.88-1
May 18 13:49:26 omega kernel: [1213244.002200] Hardware name: HP ProLiant DL360 G7, BIOS P68 08/16/2015
May 18 13:49:26 omega kernel: [1213244.007157] task: ffff8801186caf00 task.stack: ffffc90041b8c000
May 18 13:49:26 omega kernel: [1213244.012040] RIP: e030:[<ffffffff814f5c7d>]  [<ffffffff814f5c7d>] skb_release_data+0x8d/0x110
May 18 13:49:26 omega kernel: [1213244.016957] RSP: e02b:ffffc90041b8fc78  EFLAGS: 00010206
May 18 13:49:26 omega kernel: [1213244.021783] RAX: 0000000000000030 RBX: 290008a753b675a9 RCX: ffffffff81ce0f40
May 18 13:49:26 omega kernel: [1213244.026673] RDX: 0000000000008100 RSI: ffff88011a2c6200 RDI: ffff88011a2c6200
May 18 13:49:26 omega kernel: [1213244.031596] RBP: ffff88011a2c6200 R08: ffff88011bc00000 R09: ffff88011aa70000
May 18 13:49:26 omega kernel: [1213244.036422] R10: ffff88011a2c6200 R11: ffff8801186c0200 R12: 0000000000000000
May 18 13:49:26 omega kernel: [1213244.041267] R13: ffff88011aa77ec0 R14: ffff8801199da7c0 R15: 0000000000000000
May 18 13:49:26 omega kernel: [1213244.046055] FS:  00007fe5f35e2700(0000) GS:ffff880120280000(0000) knlGS:0000000000000000
May 18 13:49:26 omega kernel: [1213244.050785] CS:  e033 DS: 0000 ES: 0000 CR0: 0000000080050033
May 18 13:49:26 omega kernel: [1213244.055488] CR2: 00007fe579f4f059 CR3: 0000000117428000 CR4: 0000000000002660
May 18 13:49:26 omega kernel: [1213244.060232] Stack:
May 18 13:49:26 omega kernel: [1213244.064896]  ffff880117588800 ffff88011a2c6200 ffffc90041b8fec0 0000000000007e94
May 18 13:49:26 omega kernel: [1213244.069725]  ffffffff814f6267 ffff880117588800 ffff88011a2c6200 ffffffff814fcc91
May 18 13:49:26 omega kernel: [1213244.074552]  ffff880117588800 ffffffff8153f2df ffffc90000000000 ffff88011a2c6200
May 18 13:49:26 omega kernel: [1213244.079377] Call Trace:
May 18 13:49:26 omega kernel: [1213244.084123]  [<ffffffff814f6267>] ? consume_skb+0x27/0x80
May 18 13:49:26 omega kernel: [1213244.089047]  [<ffffffff814fcc91>] ? skb_free_datagram+0x11/0x40
May 18 13:49:26 omega kernel: [1213244.093728]  [<ffffffff8153f2df>] ? netlink_recvmsg+0x19f/0x440
May 18 13:49:26 omega kernel: [1213244.098359]  [<ffffffff814ed4ca>] ? ___sys_recvmsg+0xda/0x1f0
May 18 13:49:26 omega kernel: [1213244.102962]  [<ffffffff812237fb>] ? file_update_time+0xcb/0x110
May 18 13:49:26 omega kernel: [1213244.107530]  [<ffffffff8120fbeb>] ? pipe_write+0x29b/0x3e0
May 18 13:49:26 omega kernel: [1213244.112074]  [<ffffffff812066b0>] ? new_sync_write+0xe0/0x130
May 18 13:49:26 omega kernel: [1213244.116625]  [<ffffffff814edf4e>] ? __sys_recvmsg+0x4e/0x90
May 18 13:49:26 omega kernel: [1213244.121183]  [<ffffffff81003b7d>] ? do_syscall_64+0x8d/0xf0
May 18 13:49:26 omega kernel: [1213244.125715]  [<ffffffff8161244e>] ? entry_SYSCALL_64_after_swapgs+0x58/0xc6
May 18 13:49:26 omega kernel: [1213244.130196] Code: 03 48 c1 e8 37 83 e0 07 83 f8 04 74 49 41 0f b6 45 00 41 83 c4 01 44 39 e0 7e 51 49 63 c4 48 83 c0 03 48 c1 e0 04 49 8b 5c 05 00 <48> 8b 43 20 48 8d 50 ff a8 01 48 0f 45 da f0 ff 4b 1c 75 bf 48 
May 18 13:49:26 omega kernel: [1213244.139830] RIP  [<ffffffff814f5c7d>] skb_release_data+0x8d/0x110
May 18 13:49:26 omega kernel: [1213244.144491]  RSP <ffffc90041b8fc78>
May 18 13:49:26 omega kernel: [1213244.164037] ---[ end trace c53e06696e145c33 ]---

May 22 13:10:59 sirius kernel: [1520897.781283] general protection fault: 0000 [#1] SMP
May 22 13:10:59 sirius kernel: [1520897.784780] Modules linked in: xt_physdev br_netfilter bridge stp llc xen_netback xen_blkback binfmt_misc algif_skcipher af_alg dm_service_time openvswitch nf_nat_ipv6 libcrc32c xen_gntdev xen_evtchn xenfs xen_privcmd ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6table_mangle ip6table_raw ip6_tables ipt_REJECT nf_reject_ipv4 xt_tcpudp xt_owner xt_multiport xt_conntrack iptable_filter iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_raw dm_crypt intel_powerclamp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel iTCO_wdt iTCO_vendor_support amdkfd joydev evdev radeon serio_raw pcspkr ttm drm_kms_helper drm i2c_algo_bit lpc_ich i7core_edac mfd_core sg ipmi_si hpilo hpwdt edac_core acpi_power_meter ipmi_msghandler button
May 22 13:10:59 sirius kernel: [1520897.808262]  shpchp dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 ext4 crc16 jbd2 fscrypto ecb mbcache btrfs crc32c_generic xor raid6_pq mlx4_en ptp pps_core hid_generic usbhid hid sd_mod crc32c_intel aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd psmouse ehci_pci uhci_hcd ehci_hcd usbcore usb_common hpsa scsi_transport_sas mlx4_core bnx2 devlink scsi_mod thermal
May 22 13:10:59 sirius kernel: [1520897.829713] CPU: 3 PID: 1570 Comm: revalidator8 Not tainted 4.9.0-6-amd64 #1 Debian 4.9.88-1
May 22 13:10:59 sirius kernel: [1520897.834150] Hardware name: HP ProLiant DL360 G7, BIOS P68 08/16/2015
May 22 13:10:59 sirius kernel: [1520897.838605] task: ffff880117432400 task.stack: ffffc90041c3c000
May 22 13:10:59 sirius kernel: [1520897.843037] RIP: e030:[<ffffffff814f5c7d>]  [<ffffffff814f5c7d>] skb_release_data+0x8d/0x110
May 22 13:10:59 sirius kernel: [1520897.847518] RSP: e02b:ffffc90041c3fc78  EFLAGS: 00010206
May 22 13:10:59 sirius kernel: [1520897.851953] RAX: 0000000000000030 RBX: 33efd0d4e0b4e3a6 RCX: ffffffff81ce0f40
May 22 13:10:59 sirius kernel: [1520897.856583] RDX: 0000000000008100 RSI: ffff880116496800 RDI: ffff880116496800
May 22 13:10:59 sirius kernel: [1520897.861027] RBP: ffff880116496800 R08: ffff88011bc00000 R09: ffff8800d08f8000
May 22 13:10:59 sirius kernel: [1520897.865531] R10: ffff880116496800 R11: ffff880117430200 R12: 0000000000000000
May 22 13:10:59 sirius kernel: [1520897.869988] R13: ffff8800d08ffec0 R14: ffff8800d2658040 R15: 0000000000000000
May 22 13:10:59 sirius kernel: [1520897.874478] FS:  00007fb1c173c700(0000) GS:ffff8801202c0000(0000) knlGS:0000000000000000
May 22 13:10:59 sirius kernel: [1520897.878963] CS:  e033 DS: 0000 ES: 0000 CR0: 0000000080050033
May 22 13:10:59 sirius kernel: [1520897.883445] CR2: 00007fa9ddc3b174 CR3: 00000001195e8000 CR4: 0000000000002660
May 22 13:10:59 sirius kernel: [1520897.887955] Stack:
May 22 13:10:59 sirius kernel: [1520897.892363]  ffff8800cfefb000 ffff880116496800 ffffc90041c3fec0 0000000000007e78
May 22 13:10:59 sirius kernel: [1520897.896907]  ffffffff814f6267 ffff8800cfefb000 ffff880116496800 ffffffff814fcc91
May 22 13:10:59 sirius kernel: [1520897.901431]  ffff8800cfefb000 ffffffff8153f2df ffffc90000000000 ffff880116496800
May 22 13:10:59 sirius kernel: [1520897.905926] Call Trace:
May 22 13:10:59 sirius kernel: [1520897.910354]  [<ffffffff814f6267>] ? consume_skb+0x27/0x80
May 22 13:10:59 sirius kernel: [1520897.914875]  [<ffffffff814fcc91>] ? skb_free_datagram+0x11/0x40
May 22 13:10:59 sirius kernel: [1520897.919414]  [<ffffffff8153f2df>] ? netlink_recvmsg+0x19f/0x440
May 22 13:10:59 sirius kernel: [1520897.923949]  [<ffffffff814ed4ca>] ? ___sys_recvmsg+0xda/0x1f0
May 22 13:10:59 sirius kernel: [1520897.928548]  [<ffffffff812237fb>] ? file_update_time+0xcb/0x110
May 22 13:10:59 sirius kernel: [1520897.933258]  [<ffffffff8120fbeb>] ? pipe_write+0x29b/0x3e0
May 22 13:10:59 sirius kernel: [1520897.937883]  [<ffffffff812066b0>] ? new_sync_write+0xe0/0x130
May 22 13:10:59 sirius kernel: [1520897.942522]  [<ffffffff814edf4e>] ? __sys_recvmsg+0x4e/0x90
May 22 13:10:59 sirius kernel: [1520897.947202]  [<ffffffff81003b7d>] ? do_syscall_64+0x8d/0xf0
May 22 13:10:59 sirius kernel: [1520897.951837]  [<ffffffff8161244e>] ? entry_SYSCALL_64_after_swapgs+0x58/0xc6
May 22 13:10:59 sirius kernel: [1520897.956458] Code: 03 48 c1 e8 37 83 e0 07 83 f8 04 74 49 41 0f b6 45 00 41 83 c4 01 44 39 e0 7e 51 49 63 c4 48 83 c0 03 48 c1 e0 04 49 8b 5c 05 00 <48> 8b 43 20 48 8d 50 ff a8 01 48 0f 45 da f0 ff 4b 1c 75 bf 48 
May 22 13:10:59 sirius kernel: [1520897.966226] RIP  [<ffffffff814f5c7d>] skb_release_data+0x8d/0x110
May 22 13:10:59 sirius kernel: [1520897.971014]  RSP <ffffc90041c3fc78>
May 22 13:10:59 sirius kernel: [1520897.990735] ---[ end trace 70c169490927f55d ]---

^ permalink raw reply

* Re: [PATCH 0/8] pull request for net-next: batman-adv 2017-05-24
From: David Miller @ 2018-05-24 15:03 UTC (permalink / raw)
  To: sw; +Cc: netdev, b.a.t.m.a.n
In-Reply-To: <20180524120300.15829-1-sw@simonwunderlich.de>

From: Simon Wunderlich <sw@simonwunderlich.de>
Date: Thu, 24 May 2018 14:02:52 +0200

> here is a our feature/cleanup pull request of batman-adv to go into net-next.
> 
> Please pull or let me know of any problem!

Pulled.

You should really remove the EXPERIMENTAL tag from the V
protocol support if you want it to be on by default.  Maybe
even remove the Kconfig knob entirely.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox