Netdev List
 help / color / mirror / Atom feed
* [PATCH RFC net-next 5/7] selftests: fib_tests: Add option to pause after each test
From: David Ahern @ 2018-05-15  2:51 UTC (permalink / raw)
  To: netdev; +Cc: Thomas.Winter, idosch, sharpd, roopa, David Ahern
In-Reply-To: <20180515025112.16647-1-dsahern@gmail.com>

Add option to pause after each test before cleanup is done. Allows
user to do manual inspection or more ad-hoc testing after each test
with the setup in tact.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 tools/testing/selftests/net/fib_tests.sh | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 8c99f0689efc..12b648826151 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -9,6 +9,7 @@ ret=0
 TESTS="unregister down carrier nexthop"
 VERBOSE=0
 PAUSE_ON_FAIL=no
+PAUSE=no
 IP="ip -netns testns"
 
 log_test()
@@ -31,6 +32,13 @@ log_test()
 			[ "$a" = "q" ] && exit 1
 		fi
 	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
 }
 
 setup()
@@ -576,6 +584,7 @@ usage: ${0##*/} OPTS
         -t <test>   Test(s) to run (default: all)
                     (options: $TESTS)
         -p          Pause on fail
+        -P          Pause after each test before cleanup
         -v          verbose mode (show commands and output)
 EOF
 }
@@ -588,6 +597,7 @@ do
 	case $o in
 		t) TESTS=$OPTARG;;
 		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
 		v) VERBOSE=$(($VERBOSE + 1));;
 		h) usage; exit 0;;
 		*) usage; exit 1;;
@@ -596,6 +606,9 @@ done
 
 PEER_CMD="ip netns exec ${PEER_NS}"
 
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
 	exit 0
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 6/7] selftests: fib_tests: Add ipv6 route add append replace tests
From: David Ahern @ 2018-05-15  2:51 UTC (permalink / raw)
  To: netdev; +Cc: Thomas.Winter, idosch, sharpd, roopa, David Ahern
In-Reply-To: <20180515025112.16647-1-dsahern@gmail.com>

Add IPv6 route tests covering add, append and replace permutations.
Assumes the ability to add a basic single path route works; this is
required for example when adding an address to an interface.

$ fib_tests.sh -t ipv6_rt

IPv6 route add / append tests
    TEST: Attempt to add duplicate route - gw                           [ OK ]
    TEST: Attempt to add duplicate route - dev only                     [ OK ]
    TEST: Attempt to add duplicate route - reject route                 [ OK ]
    TEST: Add new nexthop for existing prefix                           [ OK ]
    TEST: Append leg to existing route - gw                             [ OK ]
    TEST: Append leg to existing route - dev only                       [ OK ]
    TEST: Append leg to existing route - reject route                   [ OK ]
    TEST: Append leg to existing reject route - gw                      [ OK ]
    TEST: Append leg to existing reject route - dev only                [ OK ]
    TEST: add multipath route                                           [ OK ]
    TEST: Attempt to add duplicate multipath route                      [ OK ]
    TEST: route add with different metrics                              [ OK ]
    TEST: route delete with metric                                      [ OK ]

IPv6 route replace tests
    TEST: single path with single path                                  [ OK ]
    TEST: single path with multipath                                    [ OK ]
    TEST: single path with reject route                                 [ OK ]
    TEST: single path with single path via multipath attribute          [ OK ]
    TEST: invalid nexthop                                               [ OK ]
    TEST: single path - replace of non-existent route                   [ OK ]
    TEST: multipath with multipath                                      [ OK ]
    TEST: multipath with single path                                    [ OK ]
    TEST: multipath with single path via multipath attribute            [ OK ]
    TEST: multipath with reject route                                   [ OK ]
    TEST: multipath - invalid first nexthop                             [ OK ]
    TEST: multipath - invalid second nexthop                            [ OK ]
    TEST: multipath - replace of non-existent route                     [ OK ]

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 tools/testing/selftests/net/fib_tests.sh | 331 ++++++++++++++++++++++++++++++-
 1 file changed, 330 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 12b648826151..20cb1d2c4e72 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -6,7 +6,7 @@
 
 ret=0
 
-TESTS="unregister down carrier nexthop"
+TESTS="unregister down carrier nexthop ipv6_rt"
 VERBOSE=0
 PAUSE_ON_FAIL=no
 PAUSE=no
@@ -574,6 +574,334 @@ fib_nexthop_test()
 }
 
 ################################################################################
+# Tests on route add and replace
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "    COMMAND: $cmd\n"
+		stderr=
+	fi
+
+	out=$(eval $cmd $stderr)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
+}
+
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route6()
+{
+	local pfx="$1"
+	local nh="$2"
+	local out
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo
+		echo "    ##################################################"
+		echo
+	fi
+
+	run_cmd "$IP -6 ro flush ${pfx}"
+	[ $? -ne 0 ] && exit 1
+
+	out=$($IP -6 ro ls match ${pfx})
+	if [ -n "$out" ]; then
+		echo "Failed to flush routes for prefix used for tests."
+		exit 1
+	fi
+
+	run_cmd "$IP -6 ro add ${pfx} ${nh}"
+	if [ $? -ne 0 ]; then
+		echo "Failed to add initial route for test."
+		exit 1
+	fi
+}
+
+# add initial route - used in replace route tests
+add_initial_route6()
+{
+	add_route6 "2001:db8:104::/64" "$1"
+}
+
+check_route6()
+{
+	local pfx="2001:db8:104::/64"
+	local expected="$1"
+	local out
+	local rc=0
+
+	out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
+	if [ -z "${out}" ]; then
+		if [ "$VERBOSE" = "1" ]; then
+			printf "\nNo route entry found\n"
+			printf "Expected:\n"
+			printf "    ${expected}\n"
+		fi
+		return 1
+	fi
+
+	# tricky way to convert output to 1-line without ip's
+	# messy '\'; this drops all extra white space
+	out=$(echo ${out})
+	if [ "${out}" != "${expected}" ]; then
+		rc=1
+		if [ "${VERBOSE}" = "1" ]; then
+			printf "    Unexpected route entry. Have:\n"
+			printf "        ${out}\n"
+			printf "    Expected:\n"
+			printf "        ${expected}\n\n"
+		fi
+	fi
+
+	return $rc
+}
+
+route_cleanup()
+{
+	$IP li del red 2>/dev/null
+	$IP li del dummy1 2>/dev/null
+	$IP li del veth1 2>/dev/null
+	$IP li del veth3 2>/dev/null
+
+	cleanup &> /dev/null
+}
+
+route_setup()
+{
+	route_cleanup
+	setup
+
+	[ "${VERBOSE}" = "1" ] && set -x
+	set -e
+
+	$IP li add red up type vrf table 101
+	$IP li add veth1 type veth peer name veth2
+	$IP li add veth3 type veth peer name veth4
+
+	$IP li set veth1 up
+	$IP li set veth3 up
+	$IP li set veth2 vrf red up
+	$IP li set veth4 vrf red up
+	$IP li add dummy1 type dummy
+	$IP li set dummy1 vrf red up
+
+	$IP -6 addr add 2001:db8:101::1/64 dev veth1
+	$IP -6 addr add 2001:db8:101::2/64 dev veth2
+	$IP -6 addr add 2001:db8:103::1/64 dev veth3
+	$IP -6 addr add 2001:db8:103::2/64 dev veth4
+	$IP -6 addr add 2001:db8:104::1/64 dev dummy1
+
+	set +ex
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv6_rt_add()
+{
+	local rc
+
+	echo
+	echo "IPv6 route add / append tests"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2"
+	log_test $? 2 "Attempt to add duplicate route - gw"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 dev veth3"
+	log_test $? 2 "Attempt to add duplicate route - dev only"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
+	log_test $? 2 "Attempt to add duplicate route - reject route"
+
+	# iproute2 prepend only sets NLM_F_CREATE
+	# - adds a new route; does NOT convert existing route to ECMP
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro prepend 2001:db8:104::/64 via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024 2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+	log_test $? 0 "Add new nexthop for existing prefix"
+
+	# route append with same prefix adds a new route
+	# - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Append leg to existing route - gw"
+
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
+	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop dev veth3 weight 1"
+	log_test $? 0 "Append leg to existing route - dev only"
+
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro append unreachable 2001:db8:104::/64"
+	log_test $? 2 "Append leg to existing route - reject route"
+
+	run_cmd "$IP -6 ro flush 2001:db8:104::/64"
+	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
+	run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
+	log_test $? 2 "Append leg to existing reject route - gw"
+
+	run_cmd "$IP -6 ro flush 2001:db8:104::/64"
+	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
+	run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
+	log_test $? 2 "Append leg to existing reject route - dev only"
+
+	# insert mpath directly
+	add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "add multipath route"
+
+	add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	log_test $? 2 "Attempt to add duplicate multipath route"
+
+	# insert of a second route without append but different metric
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::3 metric 256"
+		rc=$?
+	fi
+	log_test $rc 0 "route add with different metrics"
+
+	run_cmd "$IP -6 ro del 2001:db8:104::/64 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 via 2001:db8:103::3 dev veth3 metric 256 2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+		rc=$?
+	fi
+	log_test $rc 0 "route delete with metric"
+}
+
+ipv6_rt_replace_single()
+{
+	# single path with single path
+	#
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+	log_test $? 0 "single path with single path"
+
+	# single path with multipath
+	#
+	add_initial_route6 "nexthop via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "single path with multipath"
+
+	# single path with reject
+	#
+	add_initial_route6 "nexthop via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
+	check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
+	log_test $? 0 "single path with reject route"
+
+	# single path with single path using MULTIPATH attribute
+	#
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+	log_test $? 0 "single path with single path via multipath attribute"
+
+	# route replace fails - invalid nexthop
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:104::2"
+	if [ $? -eq 0 ]; then
+		log_test 0 1 "invalid nexthop"
+	else
+		check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+		log_test $? 0 "invalid nexthop"
+	fi
+
+	# replace non-existent route
+	# - note use of change versus replace since ip adds NLM_F_CREATE
+	#   for replace
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro change 2001:db8:105::/64 via 2001:db8:101::2"
+	log_test $? 2 "single path - replace of non-existent route"
+}
+
+ipv6_rt_replace_mpath()
+{
+	# multipath with multipath
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::3 dev veth3 weight 1"
+	log_test $? 0 "multipath with multipath"
+
+	# multipath with single
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:101::3"
+	check_route6  "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+	log_test $? 0 "multipath with single path"
+
+	# multipath with single
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3"
+	check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+	log_test $? 0 "multipath with single path via multipath attribute"
+
+	# multipath with reject
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
+	check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
+	log_test $? 0 "multipath with reject route"
+
+	# route replace fails - invalid nexthop 1
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "multipath - invalid first nexthop"
+
+	# route replace fails - invalid nexthop 2
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:113::3"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "multipath - invalid second nexthop"
+
+	# multipath non-existent route
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro change 2001:db8:105::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+	log_test $? 2 "multipath - replace of non-existent route"
+}
+
+ipv6_rt_replace()
+{
+	echo
+	echo "IPv6 route replace tests"
+
+	ipv6_rt_replace_single
+	ipv6_rt_replace_mpath
+}
+
+ipv6_route_test()
+{
+	route_setup
+
+	ipv6_rt_add
+	ipv6_rt_replace
+
+	route_cleanup
+}
+
+################################################################################
 # usage
 
 usage()
@@ -635,6 +963,7 @@ do
 	fib_down_test|down)		fib_down_test;;
 	fib_carrier_test|carrier)	fib_carrier_test;;
 	fib_nexthop_test|nexthop)	fib_nexthop_test;;
+	ipv6_route_test|ipv6_rt)	ipv6_route_test;;
 
 	help) echo "Test names: $TESTS"; exit 0;;
 	esac
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 7/7] selftests: fib_tests: Add ipv4 route add append replace tests
From: David Ahern @ 2018-05-15  2:51 UTC (permalink / raw)
  To: netdev; +Cc: Thomas.Winter, idosch, sharpd, roopa, David Ahern
In-Reply-To: <20180515025112.16647-1-dsahern@gmail.com>

Add IPv4 route tests covering add, append and replace permutations.
Assumes the ability to add a basic single path route works; this is
required for example when adding an address to an interface.

$ fib_tests.sh -t ipv4_rt
IPv4 route add / append tests
    TEST: Attempt to add duplicate route - gw                           [ OK ]
    TEST: Attempt to add duplicate route - dev only                     [ OK ]
    TEST: Attempt to add duplicate route - reject route                 [ OK ]
    TEST: Add new nexthop for existing prefix                           [ OK ]
    TEST: Append leg to existing route - gw                             [ OK ]
    TEST: Append leg to existing route - dev only                       [ OK ]
    TEST: Append leg to existing route - reject route                   [ OK ]
    TEST: Append leg to existing reject route - gw                      [ OK ]
    TEST: Append leg to existing reject route - dev only                [ OK ]
    TEST: add multipath route                                           [ OK ]
    TEST: Attempt to add duplicate multipath route                      [ OK ]
    TEST: route add with different metrics                              [ OK ]
    TEST: route delete with metric                                      [ OK ]

IPv4 route replace tests
    TEST: single path with single path                                  [ OK ]
    TEST: single path with multipath                                    [ OK ]
    TEST: single path with reject route                                 [ OK ]
    TEST: single path with single path via multipath attribute          [ OK ]
    TEST: invalid nexthop                                               [ OK ]
    TEST: single path - replace of non-existent route                   [ OK ]
    TEST: multipath with multipath                                      [ OK ]
    TEST: multipath with single path                                    [ OK ]
    TEST: multipath with single path via multipath attribute            [ OK ]
    TEST: multipath with reject route                                   [ OK ]
    TEST: multipath - invalid first nexthop                             [ OK ]
    TEST: multipath - invalid second nexthop                            [ OK ]
    TEST: multipath - replace of non-existent route                     [ OK ]

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 tools/testing/selftests/net/fib_tests.sh | 277 ++++++++++++++++++++++++++++++-
 1 file changed, 276 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 20cb1d2c4e72..19248453a2ba 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -6,7 +6,7 @@
 
 ret=0
 
-TESTS="unregister down carrier nexthop ipv6_rt"
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt"
 VERBOSE=0
 PAUSE_ON_FAIL=no
 PAUSE=no
@@ -702,6 +702,12 @@ route_setup()
 	$IP -6 addr add 2001:db8:103::2/64 dev veth4
 	$IP -6 addr add 2001:db8:104::1/64 dev dummy1
 
+	$IP addr add 172.16.101.1/24 dev veth1
+	$IP addr add 172.16.101.2/24 dev veth2
+	$IP addr add 172.16.103.1/24 dev veth3
+	$IP addr add 172.16.103.2/24 dev veth4
+	$IP addr add 172.16.104.1/24 dev dummy1
+
 	set +ex
 }
 
@@ -901,6 +907,274 @@ ipv6_route_test()
 	route_cleanup
 }
 
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route()
+{
+	local pfx="$1"
+	local nh="$2"
+	local out
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo
+		echo "    ##################################################"
+		echo
+	fi
+
+	run_cmd "$IP ro flush ${pfx}"
+	[ $? -ne 0 ] && exit 1
+
+	out=$($IP ro ls match ${pfx})
+	if [ -n "$out" ]; then
+		echo "Failed to flush routes for prefix used for tests."
+		exit 1
+	fi
+
+	run_cmd "$IP ro add ${pfx} ${nh}"
+	if [ $? -ne 0 ]; then
+		echo "Failed to add initial route for test."
+		exit 1
+	fi
+}
+
+# add initial route - used in replace route tests
+add_initial_route()
+{
+	add_route "172.16.104.0/24" "$1"
+}
+
+check_route()
+{
+	local pfx="172.16.104.0/24"
+	local expected="$1"
+	local out
+	local rc=0
+
+	out=$($IP ro ls match ${pfx})
+	if [ -z "${out}" ]; then
+		if [ "$VERBOSE" = "1" ]; then
+			printf "\nNo route entry found\n"
+			printf "Expected:\n"
+			printf "    ${expected}\n"
+		fi
+		return 1
+	fi
+
+	# tricky way to convert output to 1-line without ip's
+	# messy '\'; this drops all extra white space
+	out=$(echo ${out})
+	if [ "${out}" != "${expected}" ]; then
+		rc=1
+		if [ "${VERBOSE}" = "1" ]; then
+			printf "    Unexpected route entry. Have:\n"
+			printf "        ${out}\n"
+			printf "    Expected:\n"
+			printf "        ${expected}\n\n"
+		fi
+	fi
+
+	return $rc
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv4_rt_add()
+{
+	local rc
+
+	echo
+	echo "IPv4 route add / append tests"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2"
+	log_test $? 2 "Attempt to add duplicate route - gw"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add 172.16.104.0/24 dev veth3"
+	log_test $? 2 "Attempt to add duplicate route - dev only"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add unreachable 172.16.104.0/24"
+	log_test $? 2 "Attempt to add duplicate route - reject route"
+
+	# iproute2 prepend only sets NLM_F_CREATE
+	# - adds a new route; does NOT convert existing route to ECMP
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro prepend 172.16.104.0/24 via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.103.2 dev veth3 172.16.104.0/24 via 172.16.101.2 dev veth1"
+	log_test $? 0 "Add new nexthop for existing prefix"
+
+	# route append with same prefix adds a new route
+	# - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Append leg to existing route - gw"
+
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+	check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 dev veth3 scope link"
+	log_test $? 0 "Append leg to existing route - dev only"
+
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro append unreachable 172.16.104.0/24"
+	check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 unreachable 172.16.104.0/24"
+	log_test $? 0 "Append leg to existing route - reject route"
+
+	run_cmd "$IP ro flush 172.16.104.0/24"
+	run_cmd "$IP ro add unreachable 172.16.104.0/24"
+	run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+	check_route "unreachable 172.16.104.0/24 172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Append leg to existing reject route - gw"
+
+	run_cmd "$IP ro flush 172.16.104.0/24"
+	run_cmd "$IP ro add unreachable 172.16.104.0/24"
+	run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+	check_route "unreachable 172.16.104.0/24 172.16.104.0/24 dev veth3 scope link"
+	log_test $? 0 "Append leg to existing reject route - dev only"
+
+	# insert mpath directly
+	add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "add multipath route"
+
+	add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	log_test $? 2 "Attempt to add duplicate multipath route"
+
+	# insert of a second route without append but different metric
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.3 metric 256"
+		rc=$?
+	fi
+	log_test $rc 0 "route add with different metrics"
+
+	run_cmd "$IP ro del 172.16.104.0/24 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.3 dev veth3 metric 256"
+		rc=$?
+	fi
+	log_test $rc 0 "route delete with metric"
+}
+
+ipv4_rt_replace_single()
+{
+	# single path with single path
+	#
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "single path with single path"
+
+	# single path with multipath
+	#
+	add_initial_route "nexthop via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.2"
+	check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "single path with multipath"
+
+	# single path with reject
+	#
+	add_initial_route "nexthop via 172.16.101.2"
+	run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+	check_route "unreachable 172.16.104.0/24"
+	log_test $? 0 "single path with reject route"
+
+	# single path with single path using MULTIPATH attribute
+	#
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "single path with single path via multipath attribute"
+
+	# route replace fails - invalid nexthop
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 via 2001:db8:104::2"
+	if [ $? -eq 0 ]; then
+		log_test 0 1 "invalid nexthop"
+	else
+		check_route "172.16.104.0/24 via 172.16.101.2 dev veth1"
+		log_test $? 0 "invalid nexthop"
+	fi
+
+	# replace non-existent route
+	# - note use of change versus replace since ip adds NLM_F_CREATE
+	#   for replace
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro change 172.16.105.0/24 via 172.16.101.2"
+	log_test $? 2 "single path - replace of non-existent route"
+}
+
+ipv4_rt_replace_mpath()
+{
+	# multipath with multipath
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.3 dev veth3 weight 1"
+	log_test $? 0 "multipath with multipath"
+
+	# multipath with single
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.101.3"
+	check_route  "172.16.104.0/24 via 172.16.101.3 dev veth1"
+	log_test $? 0 "multipath with single path"
+
+	# multipath with single
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3"
+	check_route "172.16.104.0/24 via 172.16.101.3 dev veth1"
+	log_test $? 0 "multipath with single path via multipath attribute"
+
+	# multipath with reject
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+	check_route "unreachable 172.16.104.0/24"
+	log_test $? 0 "multipath with reject route"
+
+	# route replace fails - invalid nexthop 1
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.111.3 nexthop via 172.16.103.3"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "multipath - invalid first nexthop"
+
+	# route replace fails - invalid nexthop 2
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.113.3"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "multipath - invalid second nexthop"
+
+	# multipath non-existent route
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro change 172.16.105.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+	log_test $? 2 "multipath - replace of non-existent route"
+}
+
+ipv4_rt_replace()
+{
+	echo
+	echo "IPv4 route replace tests"
+
+	ipv4_rt_replace_single
+	ipv4_rt_replace_mpath
+}
+
+ipv4_route_test()
+{
+	route_setup
+
+	ipv4_rt_add
+	ipv4_rt_replace
+
+	route_cleanup
+}
+
 ################################################################################
 # usage
 
@@ -964,6 +1238,7 @@ do
 	fib_carrier_test|carrier)	fib_carrier_test;;
 	fib_nexthop_test|nexthop)	fib_nexthop_test;;
 	ipv6_route_test|ipv6_rt)	ipv6_route_test;;
+	ipv4_route_test|ipv4_rt)	ipv4_route_test;;
 
 	help) echo "Test names: $TESTS"; exit 0;;
 	esac
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH] lib/rhashtable: reorder some inititalization sequences
From: David Miller @ 2018-05-15  2:52 UTC (permalink / raw)
  To: dave; +Cc: akpm, tgraf, herbert, netdev, linux-kernel, dbueso
In-Reply-To: <20180514151332.31352-1-dave@stgolabs.net>

From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 14 May 2018 08:13:32 -0700

> rhashtable_init() allocates memory at the very end of the
> call, once everything is setup; with the exception of the
> nelems parameter. However, unless the user is doing something
> bogus with params for which -EINVAL is returned, memory
> allocation is the only operation that can trigger the call
> to fail.
> 
> Thus move bucket_table_alloc() up such that we fail back to
> the caller asap, instead of doing useless checks. This is
> safe as the the table allocation isn't using the halfly
> setup 'ht' structure and bucket_table_alloc() call chain only
> ends up using the ht->nulls_base member in INIT_RHT_NULLS_HEAD.
> 
> Also move the locking initialization down to the end.
> 
> Signed-off-by: Davidlohr Bueso <dbueso@suse.de>

The user potentially "doing something bogus" is why the most
expensive part of the initialization (the memory allocation)
is done after everything else is validated.

I think it's best to keep things as-is.

^ permalink raw reply

* Re: [PATCH net-next v3 0/8] sctp: refactor sctp_outq_flush
From: David Miller @ 2018-05-15  2:57 UTC (permalink / raw)
  To: marcelo.leitner; +Cc: netdev, linux-sctp, nhorman, lucien.xin, vyasevich
In-Reply-To: <cover.1526318522.git.marcelo.leitner@gmail.com>

From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Mon, 14 May 2018 14:34:35 -0300

> Currently sctp_outq_flush does many different things and arguably
> unrelated, such as doing transport selection and outq dequeueing.
> 
> This patchset refactors it into smaller and more dedicated functions.
> The end behavior should be the same.
> 
> The next patchset will rework the function parameters.
> 
> Changes since v1:
> - fix build issues on patches 3 and 4, and updated 5 and 8 because of
>   it.
> 
> Changes since v2:
> - fixed panic if building with just up to patch 3 applied

Series applied, thanks.

^ permalink raw reply

* Re: [RFC bpf-next 06/11] bpf: Add reference tracking to verifier
From: Alexei Starovoitov @ 2018-05-15  3:04 UTC (permalink / raw)
  To: Joe Stringer; +Cc: daniel, netdev, ast, john.fastabend, kafai
In-Reply-To: <20180509210709.7201-7-joe@wand.net.nz>

On Wed, May 09, 2018 at 02:07:04PM -0700, Joe Stringer wrote:
> Allow helper functions to acquire a reference and return it into a
> register. Specific pointer types such as the PTR_TO_SOCKET will
> implicitly represent such a reference. The verifier must ensure that
> these references are released exactly once in each path through the
> program.
> 
> To achieve this, this commit assigns an id to the pointer and tracks it
> in the 'bpf_func_state', then when the function or program exits,
> verifies that all of the acquired references have been freed. When the
> pointer is passed to a function that frees the reference, it is removed
> from the 'bpf_func_state` and all existing copies of the pointer in
> registers are marked invalid.
> 
> Signed-off-by: Joe Stringer <joe@wand.net.nz>
> ---
>  include/linux/bpf_verifier.h |  18 ++-
>  kernel/bpf/verifier.c        | 295 ++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 292 insertions(+), 21 deletions(-)
> 
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index 9dcd87f1d322..8dbee360b3ec 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -104,6 +104,11 @@ struct bpf_stack_state {
>  	u8 slot_type[BPF_REG_SIZE];
>  };
>  
> +struct bpf_reference_state {
> +	int id;
> +	int insn_idx; /* allocation insn */

the insn_idx is for more verbose messages, right?
It doesn't seem to affect the safety of algorithm.
Please add a comment to clarify that.

> +};
> +
>  /* state of the program:
>   * type of all registers and stack info
>   */
> @@ -122,7 +127,9 @@ struct bpf_func_state {
>  	 */
>  	u32 subprogno;
>  
> -	/* should be second to last. See copy_func_state() */
> +	/* The following fields should be last. See copy_func_state() */
> +	int acquired_refs;
> +	struct bpf_reference_state *refs;
>  	int allocated_stack;
>  	struct bpf_stack_state *stack;
>  };
> @@ -218,11 +225,16 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
>  __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
>  					   const char *fmt, ...);
>  
> -static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
> +static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
>  {
>  	struct bpf_verifier_state *cur = env->cur_state;
>  
> -	return cur->frame[cur->curframe]->regs;
> +	return cur->frame[cur->curframe];
> +}
> +
> +static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
> +{
> +	return cur_func(env)->regs;
>  }
>  
>  int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index f426ebf2b6bf..92b9a5dc465a 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -1,5 +1,6 @@
>  /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
>   * Copyright (c) 2016 Facebook
> + * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
>   *
>   * This program is free software; you can redistribute it and/or
>   * modify it under the terms of version 2 of the GNU General Public
> @@ -140,6 +141,18 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
>   *
>   * After the call R0 is set to return type of the function and registers R1-R5
>   * are set to NOT_INIT to indicate that they are no longer readable.
> + *
> + * The following reference types represent a potential reference to a kernel
> + * resource which, after first being allocated, must be checked and freed by
> + * the BPF program:
> + * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
> + *
> + * When the verifier sees a helper call return a reference type, it allocates a
> + * pointer id for the reference and stores it in the current function state.
> + * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
> + * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
> + * passes through a NULL-check conditional. For the branch wherein the state is
> + * changed to CONST_IMM, the verifier releases the reference.
>   */
>  
>  /* verifier_state + insn_idx are pushed to stack when branch is encountered */
> @@ -229,7 +242,42 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
>  
>  static bool reg_type_may_be_null(enum bpf_reg_type type)
>  {
> -	return type == PTR_TO_MAP_VALUE_OR_NULL;
> +	return type == PTR_TO_MAP_VALUE_OR_NULL ||
> +	       type == PTR_TO_SOCKET_OR_NULL;
> +}
> +
> +static bool type_is_refcounted(enum bpf_reg_type type)
> +{
> +	return type == PTR_TO_SOCKET;
> +}
> +
> +static bool type_is_refcounted_or_null(enum bpf_reg_type type)
> +{
> +	return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
> +}
> +
> +static bool reg_is_refcounted(const struct bpf_reg_state *reg)
> +{
> +	return type_is_refcounted(reg->type);
> +}
> +
> +static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
> +{
> +	return type_is_refcounted_or_null(reg->type);
> +}
> +
> +static bool arg_type_is_refcounted(enum bpf_arg_type type)
> +{
> +	return type == ARG_PTR_TO_SOCKET;
> +}
> +
> +/* Determine whether the function releases some resources allocated by another
> + * function call. The first reference type argument will be assumed to be
> + * released by release_reference().
> + */
> +static bool is_release_function(enum bpf_func_id func_id)
> +{
> +	return false;
>  }
>  
>  /* string representation of 'enum bpf_reg_type' */
> @@ -344,6 +392,12 @@ static void print_verifier_state(struct bpf_verifier_env *env,
>  		if (state->stack[i].slot_type[0] == STACK_ZERO)
>  			verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE);
>  	}
> +	if (state->acquired_refs && state->refs[0].id) {
> +		verbose(env, " refs=%d", state->refs[0].id);
> +		for (i = 1; i < state->acquired_refs; i++)
> +			if (state->refs[i].id)
> +				verbose(env, ",%d", state->refs[i].id);
> +	}
>  	verbose(env, "\n");
>  }
>  
> @@ -362,6 +416,8 @@ static int copy_##NAME##_state(struct bpf_func_state *dst,		\
>  	       sizeof(*src->FIELD) * (src->COUNT / SIZE));		\
>  	return 0;							\
>  }
> +/* copy_reference_state() */
> +COPY_STATE_FN(reference, acquired_refs, refs, 1)
>  /* copy_stack_state() */
>  COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
>  #undef COPY_STATE_FN
> @@ -400,6 +456,8 @@ static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
>  	state->FIELD = new_##FIELD;					\
>  	return 0;							\
>  }
> +/* realloc_reference_state() */
> +REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
>  /* realloc_stack_state() */
>  REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
>  #undef REALLOC_STATE_FN
> @@ -411,16 +469,89 @@ REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
>   * which realloc_stack_state() copies over. It points to previous
>   * bpf_verifier_state which is never reallocated.
>   */
> -static int realloc_func_state(struct bpf_func_state *state, int size,
> -			      bool copy_old)
> +static int realloc_func_state(struct bpf_func_state *state, int stack_size,
> +			      int refs_size, bool copy_old)
>  {
> -	return realloc_stack_state(state, size, copy_old);
> +	int err = realloc_reference_state(state, refs_size, copy_old);
> +	if (err)
> +		return err;
> +	return realloc_stack_state(state, stack_size, copy_old);
> +}
> +
> +/* Acquire a pointer id from the env and update the state->refs to include
> + * this new pointer reference.
> + * On success, returns a valid pointer id to associate with the register
> + * On failure, returns a negative errno.
> + */
> +static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
> +{
> +	struct bpf_func_state *state = cur_func(env);
> +	int new_ofs = state->acquired_refs;
> +	int id, err;
> +
> +	err = realloc_reference_state(state, state->acquired_refs + 1, true);
> +	if (err)
> +		return err;
> +	id = ++env->id_gen;
> +	state->refs[new_ofs].id = id;
> +	state->refs[new_ofs].insn_idx = insn_idx;

I thought that we may avoid this extra 'ref_state' array if we store
'id' into 'aux' array which is one to one to array of instructions
and avoid this expensive reallocs, but then I realized we can go
through the same instruction that returns a pointer to socket
multiple times and every time it needs to be different 'id' and
tracked indepdently, so yeah. All that infra is necessary.
Would be good to document the algorithm a bit more.

> +
> +	return id;
> +}
> +
> +/* release function corresponding to acquire_reference_state(). Idempotent. */
> +static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
> +{
> +	int i, last_idx;
> +
> +	if (!ptr_id)
> +		return 0;
> +
> +	last_idx = state->acquired_refs - 1;
> +	for (i = 0; i < state->acquired_refs; i++) {
> +		if (state->refs[i].id == ptr_id) {
> +			if (last_idx && i != last_idx)
> +				memcpy(&state->refs[i], &state->refs[last_idx],
> +				       sizeof(*state->refs));
> +			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
> +			state->acquired_refs--;
> +			return 0;
> +		}
> +	}
> +	return -EFAULT;
> +}
> +
> +/* variation on the above for cases where we expect that there must be an
> + * outstanding reference for the specified ptr_id.
> + */
> +static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
> +{
> +	struct bpf_func_state *state = cur_func(env);
> +	int err;
> +
> +	err = __release_reference_state(state, ptr_id);
> +	if (WARN_ON_ONCE(err != 0))
> +		verbose(env, "verifier internal error: can't release reference\n");
> +	return err;
> +}
> +
> +static int transfer_reference_state(struct bpf_func_state *dst,
> +				    struct bpf_func_state *src)
> +{
> +	int err = realloc_reference_state(dst, src->acquired_refs, false);
> +	if (err)
> +		return err;
> +	err = copy_reference_state(dst, src);
> +	if (err)
> +		return err;
> +	return 0;
>  }
>  
>  static void free_func_state(struct bpf_func_state *state)
>  {
>  	if (!state)
>  		return;
> +	kfree(state->refs);
>  	kfree(state->stack);
>  	kfree(state);
>  }
> @@ -446,10 +577,14 @@ static int copy_func_state(struct bpf_func_state *dst,
>  {
>  	int err;
>  
> -	err = realloc_func_state(dst, src->allocated_stack, false);
> +	err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
> +				 false);
> +	if (err)
> +		return err;
> +	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
> +	err = copy_reference_state(dst, src);
>  	if (err)
>  		return err;
> -	memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
>  	return copy_stack_state(dst, src);
>  }
>  
> @@ -1019,7 +1154,7 @@ static int check_stack_write(struct bpf_verifier_env *env,
>  	enum bpf_reg_type type;
>  
>  	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
> -				 true);
> +				 state->acquired_refs, true);
>  	if (err)
>  		return err;
>  	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
> @@ -2259,10 +2394,32 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
>  	return true;
>  }
>  
> +static bool check_refcount_ok(const struct bpf_func_proto *fn)
> +{
> +	int count = 0;
> +
> +	if (arg_type_is_refcounted(fn->arg1_type))
> +		count++;
> +	if (arg_type_is_refcounted(fn->arg2_type))
> +		count++;
> +	if (arg_type_is_refcounted(fn->arg3_type))
> +		count++;
> +	if (arg_type_is_refcounted(fn->arg4_type))
> +		count++;
> +	if (arg_type_is_refcounted(fn->arg5_type))
> +		count++;
> +
> +	/* We only support one arg being unreferenced at the moment,
> +	 * which is sufficient for the helper functions we have right now.
> +	 */
> +	return count <= 1;
> +}
> +
>  static int check_func_proto(const struct bpf_func_proto *fn)
>  {
>  	return check_raw_mode_ok(fn) &&
> -	       check_arg_pair_ok(fn) ? 0 : -EINVAL;
> +	       check_arg_pair_ok(fn) &&
> +	       check_refcount_ok(fn) ? 0 : -EINVAL;
>  }
>  
>  /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
> @@ -2295,12 +2452,57 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
>  		__clear_all_pkt_pointers(env, vstate->frame[i]);
>  }
>  
> +static void release_reg_references(struct bpf_verifier_env *env,
> +				   struct bpf_func_state *state, int id)
> +{
> +	struct bpf_reg_state *regs = state->regs, *reg;
> +	int i;
> +
> +	for (i = 0; i < MAX_BPF_REG; i++)
> +		if (regs[i].id == id)
> +			mark_reg_unknown(env, regs, i);
> +
> +	for_each_spilled_reg(i, state, reg) {
> +		if (!reg)
> +			continue;
> +		if (reg_is_refcounted(reg) && reg->id == id)
> +			__mark_reg_unknown(reg);
> +	}
> +}
> +
> +/* The pointer with the specified id has released its reference to kernel
> + * resources. Identify all copies of the same pointer and clear the reference.
> + */
> +static int release_reference(struct bpf_verifier_env *env)
> +{
> +	struct bpf_verifier_state *vstate = env->cur_state;
> +	struct bpf_reg_state *regs = cur_regs(env);
> +	int i, ptr_id = 0;
> +
> +	for (i = BPF_REG_1; i < BPF_REG_6; i++) {
> +		if (reg_is_refcounted(&regs[i])) {
> +			ptr_id = regs[i].id;
> +			break;
> +		}
> +	}
> +	if (WARN_ON_ONCE(!ptr_id)) {
> +		/* references must be special pointer types that are checked
> +		 * against argument requirements for the release function. */
> +		verbose(env, "verifier internal error: can't locate refcounted arg\n");
> +		return -EFAULT;
> +	}
> +	for (i = 0; i <= vstate->curframe; i++)
> +		release_reg_references(env, vstate->frame[i], ptr_id);
> +
> +	return release_reference_state(env, ptr_id);
> +}
> +
>  static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  			   int *insn_idx)
>  {
>  	struct bpf_verifier_state *state = env->cur_state;
>  	struct bpf_func_state *caller, *callee;
> -	int i, subprog, target_insn;
> +	int i, err, subprog, target_insn;
>  
>  	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
>  		verbose(env, "the call stack of %d frames is too deep\n",
> @@ -2338,6 +2540,11 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  			state->curframe + 1 /* frameno within this callchain */,
>  			subprog /* subprog number within this prog */);
>  
> +	/* Transfer references to the callee */
> +	err = transfer_reference_state(callee, caller);
> +	if (err)
> +		return err;
> +
>  	/* copy r1 - r5 args that callee can access */
>  	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
>  		callee->regs[i] = caller->regs[i];
> @@ -2368,6 +2575,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
>  	struct bpf_verifier_state *state = env->cur_state;
>  	struct bpf_func_state *caller, *callee;
>  	struct bpf_reg_state *r0;
> +	int err;
>  
>  	callee = state->frame[state->curframe];
>  	r0 = &callee->regs[BPF_REG_0];
> @@ -2387,6 +2595,11 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
>  	/* return to the caller whatever r0 had in the callee */
>  	caller->regs[BPF_REG_0] = *r0;
>  
> +	/* Transfer references to the caller */
> +	err = transfer_reference_state(caller, callee);
> +	if (err)
> +		return err;
> +
>  	*insn_idx = callee->callsite + 1;
>  	if (env->log.level) {
>  		verbose(env, "returning from callee:\n");
> @@ -2498,6 +2711,15 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
>  			return err;
>  	}
>  
> +	/* If the function is a release() function, mark all copies of the same
> +	 * pointer as "freed" in all registers and in the stack.
> +	 */
> +	if (is_release_function(func_id)) {
> +		err = release_reference(env);

I think this can be improved if check_func_arg() stores ptr_id into meta.
Then this loop
 for (i = BPF_REG_1; i < BPF_REG_6; i++) {
       if (reg_is_refcounted(&regs[i])) {
in release_reference() won't be needed.

Also the macros from the previous patch look ugly, but considering this patch
I guess it's justified. At least I don't see a better way of doing it.

^ permalink raw reply

* Re: [PATCH ghak81 RFC V2 3/5] audit: use inline function to get audit context
From: Richard Guy Briggs @ 2018-05-15  3:05 UTC (permalink / raw)
  To: Paul Moore
  Cc: Linux-Audit Mailing List, LKML,
	Linux NetDev Upstream Mailing List, Netfilter Devel List,
	Linux Security Module list, Integrity Measurement Architecture,
	SElinux list, Eric Paris, Steve Grubb, Ingo Molnar, David Howells
In-Reply-To: <CAHC9VhQXr76FXPCe8vUWNwa2VshcPg1mZmMY7M30H1F6w30rFA@mail.gmail.com>

On 2018-05-14 17:44, Paul Moore wrote:
> On Sat, May 12, 2018 at 9:58 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
> > Recognizing that the audit context is an internal audit value, use an
> > access function to retrieve the audit context pointer for the task
> > rather than reaching directly into the task struct to get it.
> >
> > Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> > ---
> >  include/linux/audit.h                | 14 ++++++--
> >  include/net/xfrm.h                   |  2 +-
> >  kernel/audit.c                       |  6 ++--
> >  kernel/audit_watch.c                 |  2 +-
> >  kernel/auditsc.c                     | 64 +++++++++++++++++-------------------
> >  net/bridge/netfilter/ebtables.c      |  2 +-
> >  net/core/dev.c                       |  2 +-
> >  net/netfilter/x_tables.c             |  2 +-
> >  net/netlabel/netlabel_user.c         |  2 +-
> >  security/integrity/ima/ima_api.c     |  2 +-
> >  security/integrity/integrity_audit.c |  2 +-
> >  security/lsm_audit.c                 |  2 +-
> >  security/selinux/hooks.c             |  4 +--
> >  security/selinux/selinuxfs.c         |  6 ++--
> >  security/selinux/ss/services.c       | 12 +++----
> >  15 files changed, 64 insertions(+), 60 deletions(-)
> 
> Merged, but there was some fuzz due to the missing 1/5 patch and a
> handfull of checkpatch.pl fixes.  Please take a look at the commit in
> the audit/next branch and if anything looks awry please send a patch
> to fix it.

Some of that fuzz was due to the two patches (ghak46/47) that went
through the xelinux tree...  There will be a merge conflict.

Otherwise, looks ok.

> paul moore

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635

^ permalink raw reply

* Re: [PATCH net-next v3 0/3] sctp: Introduce sctp_flush_ctx
From: David Miller @ 2018-05-15  3:15 UTC (permalink / raw)
  To: marcelo.leitner; +Cc: netdev, linux-sctp, nhorman, lucien.xin, vyasevich
In-Reply-To: <cover.1526319083.git.marcelo.leitner@gmail.com>

From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Mon, 14 May 2018 14:35:17 -0300

> This struct will hold all the context used during the outq flush, so we
> don't have to pass lots of pointers all around.
> 
> Checked on x86_64, the compiler inlines all these functions and there is no
> derreference added because of the struct.
> 
> This patchset depends on 'sctp: refactor sctp_outq_flush'
> 
> Changes since v1:
> - updated to build on top of v2 of 'sctp: refactor sctp_outq_flush'
> 
> Changes since v2:
> - fixed a rebase issue which reverted a change in patch 2.
> - rebased on v3 of 'sctp: refactor sctp_outq_flush'

Also applied, thanks Marcelo.

^ permalink raw reply

* Re: [RFC bpf-next 07/11] bpf: Add helper to retrieve socket in BPF
From: Alexei Starovoitov @ 2018-05-15  3:16 UTC (permalink / raw)
  To: Joe Stringer; +Cc: Martin KaFai Lau, daniel, netdev, ast, john fastabend
In-Reply-To: <CAOftzPinEyoaaokgsssA809LQ571x_u8hhA1rXw0HjN8a5O-7w@mail.gmail.com>

On Fri, May 11, 2018 at 05:54:33PM -0700, Joe Stringer wrote:
> On 11 May 2018 at 14:41, Martin KaFai Lau <kafai@fb.com> wrote:
> > On Fri, May 11, 2018 at 02:08:01PM -0700, Joe Stringer wrote:
> >> On 10 May 2018 at 22:00, Martin KaFai Lau <kafai@fb.com> wrote:
> >> > On Wed, May 09, 2018 at 02:07:05PM -0700, Joe Stringer wrote:
> >> >> This patch adds a new BPF helper function, sk_lookup() which allows BPF
> >> >> programs to find out if there is a socket listening on this host, and
> >> >> returns a socket pointer which the BPF program can then access to
> >> >> determine, for instance, whether to forward or drop traffic. sk_lookup()
> >> >> takes a reference on the socket, so when a BPF program makes use of this
> >> >> function, it must subsequently pass the returned pointer into the newly
> >> >> added sk_release() to return the reference.
> >> >>
> >> >> By way of example, the following pseudocode would filter inbound
> >> >> connections at XDP if there is no corresponding service listening for
> >> >> the traffic:
> >> >>
> >> >>   struct bpf_sock_tuple tuple;
> >> >>   struct bpf_sock_ops *sk;
> >> >>
> >> >>   populate_tuple(ctx, &tuple); // Extract the 5tuple from the packet
> >> >>   sk = bpf_sk_lookup(ctx, &tuple, sizeof tuple, netns, 0);
> >> >>   if (!sk) {
> >> >>     // Couldn't find a socket listening for this traffic. Drop.
> >> >>     return TC_ACT_SHOT;
> >> >>   }
> >> >>   bpf_sk_release(sk, 0);
> >> >>   return TC_ACT_OK;
> >> >>
> >> >> Signed-off-by: Joe Stringer <joe@wand.net.nz>
> >> >> ---
> >>
> >> ...
> >>
> >> >> @@ -4032,6 +4036,96 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
> >> >>  };
> >> >>  #endif
> >> >>
> >> >> +struct sock *
> >> >> +sk_lookup(struct net *net, struct bpf_sock_tuple *tuple) {
> >> > Would it be possible to have another version that
> >> > returns a sk without taking its refcnt?
> >> > It may have performance benefit.
> >>
> >> Not really. The sockets are not RCU-protected, and established sockets
> >> may be torn down without notice. If we don't take a reference, there's
> >> no guarantee that the socket will continue to exist for the duration
> >> of running the BPF program.
> >>
> >> From what I follow, the comment below has a hidden implication which
> >> is that sockets without SOCK_RCU_FREE, eg established sockets, may be
> >> directly freed regardless of RCU.
> > Right, SOCK_RCU_FREE sk is the one I am concern about.
> > For example, TCP_LISTEN socket does not require taking a refcnt
> > now.  Doing a bpf_sk_lookup() may have a rather big
> > impact on handling TCP syn flood.  or the usual intention
> > is to redirect instead of passing it up to the stack?
> 
> I see, if you're only interested in listen sockets then probably this
> series could be extended with a new flag, eg something like
> BPF_F_SK_FIND_LISTENERS which restricts the set of possible sockets
> found to only listen sockets, then the implementation would call into
> __inet_lookup_listener() instead of inet_lookup(). The presence of
> that flag in the relevant register during CALL instruction would show
> that the verifier should not reference-track the result, then there'd
> need to be a check on the release to ensure that this unreferenced
> socket is never released. Just a thought, completely untested and I
> could still be missing some detail..
> 
> That said, I don't completely follow how you would expect to handle
> the traffic for sockets that are already established - the helper
> would no longer find those sockets, so you wouldn't know whether to
> pass the traffic up the stack for established traffic or not.

I think Martin has a valid concern here that if somebody starts using
this helper on the rx traffic the bpf program (via these two new
helpers) will be doing refcnt++ and refcnt-- even for listener
sockets which will cause synflood to suffer.
One can argue that this is bpf author mistake, but without fixes
(and api changes) to the helper the programmer doesn't really have a way
of avoiding this situation.
Also udp sockets don't need refcnt at all.
How about we split this single helper into three:
- bpf_sk_lookup_tcp_established() that will return refcnt-ed socket
and has to be bpf_sk_release()d by the program.
- bpf_sk_lookup_tcp_listener() that doesn't refcnt, since progs
run in rcu.
- bpf_sk_lookup_udp() that also doesn't refcnt.
The logic you want to put into this helper can be easily
replicated with these three helpers and the whole thing will
be much faster.
Thoughts?

^ permalink raw reply

* Re: [RFC bpf-next 11/11] Documentation: Describe bpf reference tracking
From: Alexei Starovoitov @ 2018-05-15  3:19 UTC (permalink / raw)
  To: Joe Stringer; +Cc: daniel, netdev, ast, john.fastabend, kafai
In-Reply-To: <20180509210709.7201-12-joe@wand.net.nz>

On Wed, May 09, 2018 at 02:07:09PM -0700, Joe Stringer wrote:
> Signed-off-by: Joe Stringer <joe@wand.net.nz>
> ---
>  Documentation/networking/filter.txt | 64 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 64 insertions(+)
> 
> diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
> index 5032e1263bc9..77be17977bc5 100644
> --- a/Documentation/networking/filter.txt
> +++ b/Documentation/networking/filter.txt
> @@ -1125,6 +1125,14 @@ pointer type.  The types of pointers describe their base, as follows:
>      PTR_TO_STACK        Frame pointer.
>      PTR_TO_PACKET       skb->data.
>      PTR_TO_PACKET_END   skb->data + headlen; arithmetic forbidden.
> +    PTR_TO_SOCKET       Pointer to struct bpf_sock_ops, implicitly refcounted.
> +    PTR_TO_SOCKET_OR_NULL
> +                        Either a pointer to a socket, or NULL; socket lookup
> +                        returns this type, which becomes a PTR_TO_SOCKET when
> +                        checked != NULL. PTR_TO_SOCKET is reference-counted,
> +                        so programs must release the reference through the
> +                        socket release function before the end of the program.
> +                        Arithmetic on these pointers is forbidden.
>  However, a pointer may be offset from this base (as a result of pointer
>  arithmetic), and this is tracked in two parts: the 'fixed offset' and 'variable
>  offset'.  The former is used when an exactly-known value (e.g. an immediate
> @@ -1168,6 +1176,13 @@ over the Ethernet header, then reads IHL and addes (IHL * 4), the resulting
>  pointer will have a variable offset known to be 4n+2 for some n, so adding the 2
>  bytes (NET_IP_ALIGN) gives a 4-byte alignment and so word-sized accesses through
>  that pointer are safe.
> +The 'id' field is also used on PTR_TO_SOCKET and PTR_TO_SOCKET_OR_NULL, common
> +to all copies of the pointer returned from a socket lookup. This has similar
> +behaviour to the handling for PTR_TO_MAP_VALUE_OR_NULL->PTR_TO_MAP_VALUE, but
> +it also handles reference tracking for the pointer. PTR_TO_SOCKET implicitly
> +represents a reference to the corresponding 'struct sock'. To ensure that the
> +reference is not leaked, it is imperative to NULL-check the reference and in
> +the non-NULL case, and pass the valid reference to the socket release function.
>  
>  Direct packet access
>  --------------------
> @@ -1441,6 +1456,55 @@ Error:
>    8: (7a) *(u64 *)(r0 +0) = 1
>    R0 invalid mem access 'imm'
>  
> +Program that performs a socket lookup then sets the pointer to NULL without
> +checking it:
> +value:
> +  BPF_MOV64_IMM(BPF_REG_2, 0),
> +  BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
> +  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
> +  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
> +  BPF_MOV64_IMM(BPF_REG_3, 4),
> +  BPF_MOV64_IMM(BPF_REG_4, 0),
> +  BPF_MOV64_IMM(BPF_REG_5, 0),
> +  BPF_EMIT_CALL(BPF_FUNC_sk_lookup),
> +  BPF_MOV64_IMM(BPF_REG_0, 0),
> +  BPF_EXIT_INSN(),
> +Error:
> +  0: (b7) r2 = 0
> +  1: (63) *(u32 *)(r10 -8) = r2
> +  2: (bf) r2 = r10
> +  3: (07) r2 += -8
> +  4: (b7) r3 = 4
> +  5: (b7) r4 = 0
> +  6: (b7) r5 = 0
> +  7: (85) call bpf_sk_lookup#65
> +  8: (b7) r0 = 0
> +  9: (95) exit
> +  Unreleased reference id=1, alloc_insn=7
> +
> +Program that performs a socket lookup but does not NULL-check the returned
> +value:
> +  BPF_MOV64_IMM(BPF_REG_2, 0),
> +  BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
> +  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
> +  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
> +  BPF_MOV64_IMM(BPF_REG_3, 4),
> +  BPF_MOV64_IMM(BPF_REG_4, 0),
> +  BPF_MOV64_IMM(BPF_REG_5, 0),
> +  BPF_EMIT_CALL(BPF_FUNC_sk_lookup),
> +  BPF_EXIT_INSN(),
> +Error:
> +  0: (b7) r2 = 0
> +  1: (63) *(u32 *)(r10 -8) = r2
> +  2: (bf) r2 = r10
> +  3: (07) r2 += -8
> +  4: (b7) r3 = 4
> +  5: (b7) r4 = 0
> +  6: (b7) r5 = 0
> +  7: (85) call bpf_sk_lookup#65
> +  8: (95) exit
> +  Unreleased reference id=1, alloc_insn=7

Nice. Thank you for updating this doc. We haven't touched it in long time.
It probably long overdue for complete overhaul.

^ permalink raw reply

* Re: [PATCH ghak81 RFC V2 3/5] audit: use inline function to get audit context
From: Richard Guy Briggs @ 2018-05-15  3:28 UTC (permalink / raw)
  To: Paul Moore
  Cc: Linux-Audit Mailing List, LKML,
	Linux NetDev Upstream Mailing List, Netfilter Devel List,
	Linux Security Module list, Integrity Measurement Architecture,
	SElinux list, Eric Paris, Steve Grubb, Ingo Molnar, David Howells
In-Reply-To: <20180515030545.7oiyz33rzoj6sxs5@madcap2.tricolour.ca>

On 2018-05-14 23:05, Richard Guy Briggs wrote:
> On 2018-05-14 17:44, Paul Moore wrote:
> > On Sat, May 12, 2018 at 9:58 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
> > > Recognizing that the audit context is an internal audit value, use an
> > > access function to retrieve the audit context pointer for the task
> > > rather than reaching directly into the task struct to get it.
> > >
> > > Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> > > ---
> > >  include/linux/audit.h                | 14 ++++++--
> > >  include/net/xfrm.h                   |  2 +-
> > >  kernel/audit.c                       |  6 ++--
> > >  kernel/audit_watch.c                 |  2 +-
> > >  kernel/auditsc.c                     | 64 +++++++++++++++++-------------------
> > >  net/bridge/netfilter/ebtables.c      |  2 +-
> > >  net/core/dev.c                       |  2 +-
> > >  net/netfilter/x_tables.c             |  2 +-
> > >  net/netlabel/netlabel_user.c         |  2 +-
> > >  security/integrity/ima/ima_api.c     |  2 +-
> > >  security/integrity/integrity_audit.c |  2 +-
> > >  security/lsm_audit.c                 |  2 +-
> > >  security/selinux/hooks.c             |  4 +--
> > >  security/selinux/selinuxfs.c         |  6 ++--
> > >  security/selinux/ss/services.c       | 12 +++----
> > >  15 files changed, 64 insertions(+), 60 deletions(-)
> > 
> > Merged, but there was some fuzz due to the missing 1/5 patch and a
> > handfull of checkpatch.pl fixes.  Please take a look at the commit in
> > the audit/next branch and if anything looks awry please send a patch
> > to fix it.
> 
> Some of that fuzz was due to the two patches (ghak46/47) that went
> through the xelinux tree...  There will be a merge conflict.
> 
> Otherwise, looks ok.

Spoke too soon, missed one from the new seccomp actions_logged...

Patch pending...

> > paul moore
> 
> - RGB

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635

^ permalink raw reply

* Re: [PATCH] lib/rhashtable: reorder some inititalization sequences
From: Herbert Xu @ 2018-05-15  3:37 UTC (permalink / raw)
  To: David Miller; +Cc: dave, akpm, tgraf, netdev, linux-kernel, dbueso
In-Reply-To: <20180514.225213.1789810083198383905.davem@davemloft.net>

On Mon, May 14, 2018 at 10:52:13PM -0400, David Miller wrote:
> From: Davidlohr Bueso <dave@stgolabs.net>
> Date: Mon, 14 May 2018 08:13:32 -0700
> 
> > rhashtable_init() allocates memory at the very end of the
> > call, once everything is setup; with the exception of the
> > nelems parameter. However, unless the user is doing something
> > bogus with params for which -EINVAL is returned, memory
> > allocation is the only operation that can trigger the call
> > to fail.
> > 
> > Thus move bucket_table_alloc() up such that we fail back to
> > the caller asap, instead of doing useless checks. This is
> > safe as the the table allocation isn't using the halfly
> > setup 'ht' structure and bucket_table_alloc() call chain only
> > ends up using the ht->nulls_base member in INIT_RHT_NULLS_HEAD.
> > 
> > Also move the locking initialization down to the end.
> > 
> > Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
> 
> The user potentially "doing something bogus" is why the most
> expensive part of the initialization (the memory allocation)
> is done after everything else is validated.
> 
> I think it's best to keep things as-is.

I agree.

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [RFC v2 bpf-next 8/9] bpf: Provide helper to do lookups in kernel FIB table
From: David Ahern @ 2018-05-15  3:46 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: netdev, borkmann, ast, davem, shm, roopa, brouer, toke,
	john.fastabend
In-Reply-To: <4729b693-20d7-dd9e-c48b-be8386ce9bed@gmail.com>

On 4/29/18 7:13 PM, David Ahern wrote:
> 
> The idea here is to fast pass packets that fit a supported profile and
> are to be forwarded. Everything else should continue up the stack as it
> has wider capabilities. The helper and XDP programs should make no
> assumptions on what the broader kernel and userspace might be monitoring
> or want to do with packets that can not be forwarded in the fast path.
> This is very similar to hardware forwarding when it punts packets to the
> CPU for control plane assistance.
> 

Thinking about this some more and how to return more information to the
bpf program about the FIB lookup.

bpf_fib_lookup struct is 64-bytes. It can not be expanded without
hurting performance. I could do another union on an input parameter and
return flags indicating why the returned index is 0. Something like this:

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 360a1168c353..75591522444c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2314,6 +2314,12 @@ struct bpf_raw_tracepoint_args {
 #define BPF_FIB_LOOKUP_DIRECT  BIT(0)
 #define BPF_FIB_LOOKUP_OUTPUT  BIT(1)

+#define BPF_FIB_LKUP_RET_NO_FWD      BIT(0)  /* pkt is not fwded */
+#define BPF_FIB_LKUP_RET_UNSUPP_LWT  BIT(1)  /* fwd requires unsupp
encap */
+#define BPF_FIB_LKUP_RET_NO_NHDEV    BIT(2)  /* nh device does not exist */
+#define BPF_FIB_LKUP_RET_NO_NEIGH    BIT(3)  /* no neigh entry for nh */
+#define BPF_FIB_LKUP_RET_FRAG_NEEDED BIT(4)  /* pkt too big to fwd */
+
 struct bpf_fib_lookup {
        /* input */
        __u8    family;   /* network family, AF_INET, AF_INET6, AF_MPLS */
@@ -2325,7 +2331,11 @@ struct bpf_fib_lookup {

        /* total length of packet from network header - used for MTU
check */
        __u16   tot_len;
-       __u32   ifindex;  /* L3 device index for lookup */
+
+       union {
+               __u32   ifindex;   /* in: L3 device index for lookup */
+               __u32   ret_flags; /* out: BPF_FIB_LOOKUP_RET flags */
+       }

        union {
                /* inputs to lookup */


Similarly for the fib result, it could be returned with a union on say
family:
    union {
        __u8 family;   /* in: network family, AF_INET, AF_INET6, AF_MPLS */
        __u8 rt_type;  /* out: FIB lookup route type */
    };

Then if the fib result is -EINVAL/-EHOSTUNREACH/-EACCES, rt_type is set
to RTN_BLACKHOLE/RTN_UNREACHABLE/RTN_PROHIBIT allowing the XDP program
to make an informed decision on dropping the packet.

To avoid performance hits on the forwarding path, these return values
would *only* set if the ifindex returned is 0.

^ permalink raw reply related

* mounting NFS on the same host leads to D state
From: maowenan @ 2018-05-15  3:47 UTC (permalink / raw)
  To: netdev@vger.kernel.org, jlayton, linux-nfs; +Cc: yangerkun, weiyongjun (A)

Hi,

Recently I have tested NFS and exportfs scenario,
that NFS server and client are in the same host.
And I found mounting NFS filesystm onto the same host
can lead to rpc.mountd and related task become D state.
My kernel version is based on 3.10, and I find 4.15 has the same
appearance.

My test step as below:
1)create dir.
mkdir -p /home/test1 /home/test2
2)share dir /home/test1
echo '/home/test1 localhost(rw,all_squash,anonuid=0,anongid=0)' > /etc/exports
3)exportfs
exportfs -vr || echo "Failed to export /home/test1"
4)mount NFS.
mount localhost:/home/test1 /home/test2 -o vers=3,soft
5)share dir /home/test2
echo '/home/test2 *(rw,all_squash,anonuid=0,anongid=0)' >> /etc/exports
6)exportfs
exportfs -vr
7) list /home/test2
ls /home/test2
then we found ls command is hung, ls and rpc.mountd became "D" state, and after
180 second ls command return.

Another scenario as below:
1)create dir.
mkdir -p /home/test3 /home/test4
2)share dir /home/test3
echo '/home/test3 localhost(rw,sync,no_wdelay,anonuid=0,anongid=0,no_subtree_check)' > /etc/exports
3)exportfs
exportfs -r
4)to see NFS status
showmount -e localhost
5)mount NFS
mount -t nfs4 -o proto=tcp,nolock,soft,timeo=50 localhost:/home/test3 /home/test4
6) stop nfs service,and  and check ls task state is D.
service nfs stop
ls /home/test4
ls command is hung and became D state.

I wonder to know is it reasonable about these test scenario because NFS server and
client are in the same host? Since some task went into D state, is there any reason about this?
and is there any patch to fix this issue?
Here is a link to talk about NFS mounting on the same host,  https://lwn.net/Articles/595652/

^ permalink raw reply

* Re: [PATCH iproute2] ip: do not drop capabilities if net_admin=i is set
From: Stephen Hemminger @ 2018-05-15  4:10 UTC (permalink / raw)
  To: Luca Boccassi; +Cc: netdev, dsahern, luto
In-Reply-To: <20180511123956.5638-1-bluca@debian.org>

On Fri, 11 May 2018 13:39:56 +0100
Luca Boccassi <bluca@debian.org> wrote:

> Users have reported a regression due to ip now dropping capabilities
> unconditionally.
> zerotier-one VPN and VirtualBox use ambient capabilities in their
> binary and then fork out to ip to set routes and links, and this
> does not work anymore.
> 
> As a workaround, do not drop caps if CAP_NET_ADMIN (the most common
> capability used by ip) is set with the INHERITABLE flag.
> Users that want ip vrf exec to work do not need to set INHERITABLE,
> which will then only set when the calling program had privileges to
> give itself the ambient capability.
> 
> Fixes: ba2fc55b99f8 ("Drop capabilities if not running ip exec vrf with libcap")
> 
> Signed-off-by: Luca Boccassi <bluca@debian.org>

Applied

^ permalink raw reply

* [PATCH net] tcp: purge write queue in tcp_connect_init()
From: Eric Dumazet @ 2018-05-15  4:14 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, Eric Dumazet, Eric Dumazet, Yuchung Cheng, Neal Cardwell

syzkaller found a reliable way to crash the host, hitting a BUG()
in __tcp_retransmit_skb()

Malicous MSG_FASTOPEN is the root cause. We need to purge write queue
in tcp_connect_init() at the point we init snd_una/write_seq.

This patch also replaces the BUG() by a less intrusive WARN_ON_ONCE()

kernel BUG at net/ipv4/tcp_output.c:2837!
invalid opcode: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 0 PID: 5276 Comm: syz-executor0 Not tainted 4.17.0-rc3+ #51
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:__tcp_retransmit_skb+0x2992/0x2eb0 net/ipv4/tcp_output.c:2837
RSP: 0000:ffff8801dae06ff8 EFLAGS: 00010206
RAX: ffff8801b9fe61c0 RBX: 00000000ffc18a16 RCX: ffffffff864e1a49
RDX: 0000000000000100 RSI: ffffffff864e2e12 RDI: 0000000000000005
RBP: ffff8801dae073a0 R08: ffff8801b9fe61c0 R09: ffffed0039c40dd2
R10: ffffed0039c40dd2 R11: ffff8801ce206e93 R12: 00000000421eeaad
R13: ffff8801ce206d4e R14: ffff8801ce206cc0 R15: ffff8801cd4f4a80
FS:  0000000000000000(0000) GS:ffff8801dae00000(0063) knlGS:00000000096bc900
CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
CR2: 0000000020000000 CR3: 00000001c47b6000 CR4: 00000000001406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <IRQ>
 tcp_retransmit_skb+0x2e/0x250 net/ipv4/tcp_output.c:2923
 tcp_retransmit_timer+0xc50/0x3060 net/ipv4/tcp_timer.c:488
 tcp_write_timer_handler+0x339/0x960 net/ipv4/tcp_timer.c:573
 tcp_write_timer+0x111/0x1d0 net/ipv4/tcp_timer.c:593
 call_timer_fn+0x230/0x940 kernel/time/timer.c:1326
 expire_timers kernel/time/timer.c:1363 [inline]
 __run_timers+0x79e/0xc50 kernel/time/timer.c:1666
 run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692
 __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
 invoke_softirq kernel/softirq.c:365 [inline]
 irq_exit+0x1d1/0x200 kernel/softirq.c:405
 exiting_irq arch/x86/include/asm/apic.h:525 [inline]
 smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052
 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863

Fixes: cf60af03ca4e ("net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN)")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
---
 net/ipv4/tcp_output.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 383cac0ff0ec059ca7dbc1a6304cc7f8183e008d..d07e34f8e3091144976358674b92458076f92bfb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2833,8 +2833,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 		return -EBUSY;
 
 	if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
-		if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
-			BUG();
+		if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
+			WARN_ON_ONCE(1);
+			return -EINVAL;
+		}
 		if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
 			return -ENOMEM;
 	}
@@ -3342,6 +3344,7 @@ static void tcp_connect_init(struct sock *sk)
 	sock_reset_flag(sk, SOCK_DONE);
 	tp->snd_wnd = 0;
 	tcp_init_wl(tp, 0);
+	tcp_write_queue_purge(sk);
 	tp->snd_una = tp->write_seq;
 	tp->snd_sml = tp->write_seq;
 	tp->snd_up = tp->write_seq;
-- 
2.17.0.441.gb46fe60e1d-goog

^ permalink raw reply related

* Re: [PATCH net-next 2/3] net: qualcomm: rmnet: Add support for ethtool private stats
From: kbuild test robot @ 2018-05-15  4:42 UTC (permalink / raw)
  To: Subash Abhinov Kasiviswanathan
  Cc: kbuild-all, davem, netdev, Subash Abhinov Kasiviswanathan
In-Reply-To: <1526328527-20026-3-git-send-email-subashab@codeaurora.org>

[-- Attachment #1: Type: text/plain, Size: 2578 bytes --]

Hi Subash,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Subash-Abhinov-Kasiviswanathan/net-qualcomm-rmnet-Updates-2018-05-14/20180515-115355
config: i386-allmodconfig (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All warnings (new ones prefixed by >>):

   drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c: In function 'rmnet_map_checksum_downlink_packet':
>> drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c:382:2: warning: this 'else' clause does not guard... [-Wmisleading-indentation]
     else
     ^~~~
   drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c:384:3: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the 'else'
      return -EPROTONOSUPPORT;
      ^~~~~~

vim +/else +382 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c

   349	
   350	/* Validates packet checksums. Function takes a pointer to
   351	 * the beginning of a buffer which contains the IP payload +
   352	 * padding + checksum trailer.
   353	 * Only IPv4 and IPv6 are supported along with TCP & UDP.
   354	 * Fragmented or tunneled packets are not supported.
   355	 */
   356	int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len)
   357	{
   358		struct rmnet_priv *priv = netdev_priv(skb->dev);
   359		struct rmnet_map_dl_csum_trailer *csum_trailer;
   360	
   361		if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) {
   362			priv->stats.csum_sw++;
   363			return -EOPNOTSUPP;
   364		}
   365	
   366		csum_trailer = (struct rmnet_map_dl_csum_trailer *)(skb->data + len);
   367	
   368		if (!csum_trailer->valid) {
   369			priv->stats.csum_valid_unset++;
   370			return -EINVAL;
   371		}
   372	
   373		if (skb->protocol == htons(ETH_P_IP))
   374			return rmnet_map_ipv4_dl_csum_trailer(skb, csum_trailer, priv);
   375		else if (skb->protocol == htons(ETH_P_IPV6))
   376	#if IS_ENABLED(CONFIG_IPV6)
   377			return rmnet_map_ipv6_dl_csum_trailer(skb, csum_trailer, priv);
   378	#else
   379			priv->stats.csum_err_invalid_ip_version++;
   380			return -EPROTONOSUPPORT;
   381	#endif
 > 382		else
   383			priv->stats.csum_err_invalid_ip_version++;
   384			return -EPROTONOSUPPORT;
   385	
   386		return 0;
   387	}
   388	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 63034 bytes --]

^ permalink raw reply

* Re: [PATCH v1 1/4] media: rc: introduce BPF_PROG_IR_DECODER
From: Y Song @ 2018-05-15  4:48 UTC (permalink / raw)
  To: Sean Young
  Cc: linux-media, linux-kernel, Alexei Starovoitov,
	Mauro Carvalho Chehab, Daniel Borkmann, netdev, Matthias Reichl,
	Devin Heitmueller
In-Reply-To: <32a944171d5c48abf126259595b0088ce3122c91.1526331777.git.sean@mess.org>

On Mon, May 14, 2018 at 2:10 PM, Sean Young <sean@mess.org> wrote:
> Add support for BPF_PROG_IR_DECODER. This type of BPF program can call
> rc_keydown() to reported decoded IR scancodes, or rc_repeat() to report
> that the last key should be repeated.
>
> Signed-off-by: Sean Young <sean@mess.org>
> ---
>  drivers/media/rc/Kconfig          |  8 +++
>  drivers/media/rc/Makefile         |  1 +
>  drivers/media/rc/ir-bpf-decoder.c | 93 +++++++++++++++++++++++++++++++
>  include/linux/bpf_types.h         |  3 +
>  include/uapi/linux/bpf.h          | 16 +++++-
>  5 files changed, 120 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/media/rc/ir-bpf-decoder.c
>
> diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig
> index eb2c3b6eca7f..10ad6167d87c 100644
> --- a/drivers/media/rc/Kconfig
> +++ b/drivers/media/rc/Kconfig
> @@ -120,6 +120,14 @@ config IR_IMON_DECODER
>            remote control and you would like to use it with a raw IR
>            receiver, or if you wish to use an encoder to transmit this IR.
>
> +config IR_BPF_DECODER
> +       bool "Enable IR raw decoder using BPF"
> +       depends on BPF_SYSCALL
> +       depends on RC_CORE=y
> +       help
> +          Enable this option to make it possible to load custom IR
> +          decoders written in BPF.
> +
>  endif #RC_DECODERS
>
>  menuconfig RC_DEVICES
> diff --git a/drivers/media/rc/Makefile b/drivers/media/rc/Makefile
> index 2e1c87066f6c..12e1118430d0 100644
> --- a/drivers/media/rc/Makefile
> +++ b/drivers/media/rc/Makefile
> @@ -5,6 +5,7 @@ obj-y += keymaps/
>  obj-$(CONFIG_RC_CORE) += rc-core.o
>  rc-core-y := rc-main.o rc-ir-raw.o
>  rc-core-$(CONFIG_LIRC) += lirc_dev.o
> +rc-core-$(CONFIG_IR_BPF_DECODER) += ir-bpf-decoder.o
>  obj-$(CONFIG_IR_NEC_DECODER) += ir-nec-decoder.o
>  obj-$(CONFIG_IR_RC5_DECODER) += ir-rc5-decoder.o
>  obj-$(CONFIG_IR_RC6_DECODER) += ir-rc6-decoder.o
> diff --git a/drivers/media/rc/ir-bpf-decoder.c b/drivers/media/rc/ir-bpf-decoder.c
> new file mode 100644
> index 000000000000..aaa5e208b1a5
> --- /dev/null
> +++ b/drivers/media/rc/ir-bpf-decoder.c
> @@ -0,0 +1,93 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// ir-bpf-decoder.c - handles bpf decoders
> +//
> +// Copyright (C) 2018 Sean Young <sean@mess.org>
> +
> +#include <linux/bpf.h>
> +#include <linux/filter.h>
> +#include "rc-core-priv.h"
> +
> +/*
> + * BPF interface for raw IR decoder
> + */
> +const struct bpf_prog_ops ir_decoder_prog_ops = {
> +};
> +
> +BPF_CALL_1(bpf_rc_repeat, struct ir_raw_event*, event)
> +{
> +       struct ir_raw_event_ctrl *ctrl;
> +
> +       ctrl = container_of(event, struct ir_raw_event_ctrl, prev_ev);
> +
> +       rc_repeat(ctrl->dev);
> +       return 0;
> +}
> +
> +static const struct bpf_func_proto rc_repeat_proto = {
> +       .func      = bpf_rc_repeat,
> +       .gpl_only  = true, // rc_repeat is EXPORT_SYMBOL_GPL
> +       .ret_type  = RET_VOID,

I suggest using RET_INTEGER here since we do return an integer 0.
RET_INTEGER will also make it easy to extend if you want to return
a non-zero value for error code or other reasons.

> +       .arg1_type = ARG_PTR_TO_CTX,
> +};
> +
> +BPF_CALL_4(bpf_rc_keydown, struct ir_raw_event*, event, u32, protocol,
> +          u32, scancode, u32, toggle)
> +{
> +       struct ir_raw_event_ctrl *ctrl;
> +
> +       ctrl = container_of(event, struct ir_raw_event_ctrl, prev_ev);
> +       rc_keydown(ctrl->dev, protocol, scancode, toggle != 0);
> +       return 0;
> +}
> +
> +static const struct bpf_func_proto rc_keydown_proto = {
> +       .func      = bpf_rc_keydown,
> +       .gpl_only  = true, // rc_keydown is EXPORT_SYMBOL_GPL
> +       .ret_type  = RET_VOID,

ditto. RET_INTEGER is preferable.

> +       .arg1_type = ARG_PTR_TO_CTX,
> +       .arg2_type = ARG_ANYTHING,
> +       .arg3_type = ARG_ANYTHING,
> +       .arg4_type = ARG_ANYTHING,
> +};
> +
> +static const struct bpf_func_proto *ir_decoder_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> +{
> +       switch (func_id) {
> +       case BPF_FUNC_rc_repeat:
> +               return &rc_repeat_proto;
> +       case BPF_FUNC_rc_keydown:
> +               return &rc_keydown_proto;
> +       case BPF_FUNC_map_lookup_elem:
> +               return &bpf_map_lookup_elem_proto;
> +       case BPF_FUNC_map_update_elem:
> +               return &bpf_map_update_elem_proto;
> +       case BPF_FUNC_map_delete_elem:
> +               return &bpf_map_delete_elem_proto;
> +       case BPF_FUNC_ktime_get_ns:
> +               return &bpf_ktime_get_ns_proto;
> +       case BPF_FUNC_tail_call:
> +               return &bpf_tail_call_proto;
> +       case BPF_FUNC_get_prandom_u32:
> +               return &bpf_get_prandom_u32_proto;
> +       default:
> +               return NULL;
> +       }
> +}
> +
> +static bool ir_decoder_is_valid_access(int off, int size,
> +                                      enum bpf_access_type type,
> +                                      const struct bpf_prog *prog,
> +                                      struct bpf_insn_access_aux *info)
> +{
> +       if (type == BPF_WRITE)
> +               return false;
> +       if (off < 0 || off + size > sizeof(struct ir_raw_event))
> +               return false;

You probably need more than just checking the boundary.
>From patch #3, the structure is:
 struct ir_raw_event {
        union {
                __u32           duration;
                __u32           carrier;
        };
        __u8                    duty_cycle;

        unsigned                pulse:1;
        unsigned                reset:1;
        unsigned                timeout:1;
       unsigned                carrier_report:1;
};

You would like the memory access to be aligned,
so accessing duration/carrier with 4-byte alignment, and
pulse/reset/timeout/carrier_report 4-byte alignment as well.

You could only allow __u32 access to duration/carrier.
But if you want bpf program to access duration/carrier with
code like (__u16)(ctx->duration), then the compiler may
translate the load to a 2-byte load. You may need to handle
endianness here. You can check net/core/filter.c function
bpf_skb_is_valid_access for some examples.

> +
> +       return true;
> +}
> +
> +const struct bpf_verifier_ops ir_decoder_verifier_ops = {
> +       .get_func_proto  = ir_decoder_func_proto,
> +       .is_valid_access = ir_decoder_is_valid_access
> +};
> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> index 2b28fcf6f6ae..ee5355715ee0 100644
> --- a/include/linux/bpf_types.h
> +++ b/include/linux/bpf_types.h
> @@ -25,6 +25,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
>  #ifdef CONFIG_CGROUP_BPF
>  BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
>  #endif
> +#ifdef CONFIG_IR_BPF_DECODER
> +BPF_PROG_TYPE(BPF_PROG_TYPE_RAWIR_DECODER, ir_decoder)
> +#endif
>
>  BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
>  BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index c5ec89732a8d..6ad053e831c0 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -137,6 +137,7 @@ enum bpf_prog_type {
>         BPF_PROG_TYPE_SK_MSG,
>         BPF_PROG_TYPE_RAW_TRACEPOINT,
>         BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
> +       BPF_PROG_TYPE_RAWIR_DECODER,
>  };
>
>  enum bpf_attach_type {
> @@ -755,6 +756,17 @@ union bpf_attr {
>   *     @addr: pointer to struct sockaddr to bind socket to
>   *     @addr_len: length of sockaddr structure
>   *     Return: 0 on success or negative error code
> + *
> + * int bpf_rc_keydown(ctx, protocol, scancode, toggle)
> + *     Report decoded scancode with toggle value
> + *     @ctx: pointer to ctx
> + *     @protocol: decoded protocol
> + *     @scancode: decoded scancode
> + *     @toggle: set to 1 if button was toggled, else 0
> + *
> + * int bpf_rc_repeat(ctx)
> + *     Repeat the last decoded scancode
> + *     @ctx: pointer to ctx

The comment format has changed dramatically for
documentation reason. Could you rebase your change
on top of bpf-next tree? You will need to rewrite the above
helper description so tools can generate proper documentation
for them.

>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -821,7 +833,9 @@ union bpf_attr {
>         FN(msg_apply_bytes),            \
>         FN(msg_cork_bytes),             \
>         FN(msg_pull_data),              \
> -       FN(bind),
> +       FN(bind),                       \
> +       FN(rc_repeat),                  \
> +       FN(rc_keydown),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> --
> 2.17.0
>

^ permalink raw reply

* Re: possible deadlock in sk_diag_fill
From: Dmitry Vyukov @ 2018-05-15  5:19 UTC (permalink / raw)
  To: Andrei Vagin; +Cc: syzbot, avagin, David Miller, LKML, netdev, syzkaller-bugs
In-Reply-To: <20180514180035.GA20189@outlook.office365.com>

On Mon, May 14, 2018 at 8:00 PM, Andrei Vagin <avagin@virtuozzo.com> wrote:
>> >> Hello,
>> >>
>> >> syzbot found the following crash on:
>> >>
>> >> HEAD commit:    c1c07416cdd4 Merge tag 'kbuild-fixes-v4.17' of git://git.k..
>> >> git tree:       upstream
>> >> console output: https://syzkaller.appspot.com/x/log.txt?x=12164c97800000
>> >> kernel config:  https://syzkaller.appspot.com/x/.config?x=5a1dc06635c10d27
>> >> dashboard link: https://syzkaller.appspot.com/bug?extid=c1872be62e587eae9669
>> >> compiler:       gcc (GCC) 8.0.1 20180413 (experimental)
>> >> userspace arch: i386
>> >>
>> >> Unfortunately, I don't have any reproducer for this crash yet.
>> >>
>> >> IMPORTANT: if you fix the bug, please add the following tag to the commit:
>> >> Reported-by: syzbot+c1872be62e587eae9669@syzkaller.appspotmail.com
>> >>
>> >>
>> >> ======================================================
>> >> WARNING: possible circular locking dependency detected
>> >> 4.17.0-rc3+ #59 Not tainted
>> >> ------------------------------------------------------
>> >> syz-executor1/25282 is trying to acquire lock:
>> >> 000000004fddf743 (&(&u->lock)->rlock/1){+.+.}, at: sk_diag_dump_icons
>> >> net/unix/diag.c:82 [inline]
>> >> 000000004fddf743 (&(&u->lock)->rlock/1){+.+.}, at:
>> >> sk_diag_fill.isra.5+0xa43/0x10d0 net/unix/diag.c:144
>> >>
>> >> but task is already holding lock:
>> >> 00000000b6895645 (rlock-AF_UNIX){+.+.}, at: spin_lock
>> >> include/linux/spinlock.h:310 [inline]
>> >> 00000000b6895645 (rlock-AF_UNIX){+.+.}, at: sk_diag_dump_icons
>> >> net/unix/diag.c:64 [inline]
>> >> 00000000b6895645 (rlock-AF_UNIX){+.+.}, at: sk_diag_fill.isra.5+0x94e/0x10d0
>> >> net/unix/diag.c:144
>> >>
>> >> which lock already depends on the new lock.
>> >
>> > In the code, we have a comment which explains why it is safe to take this lock
>> >
>> > /*
>> >  * The state lock is outer for the same sk's
>> >  * queue lock. With the other's queue locked it's
>> >  * OK to lock the state.
>> >  */
>> > unix_state_lock_nested(req);
>> >
>> > It is a question how to explain this to lockdep.
>>
>> Do I understand it correctly that (&u->lock)->rlock associated with
>> AF_UNIX is locked under rlock-AF_UNIX, and then rlock-AF_UNIX is
>> locked under (&u->lock)->rlock associated with AF_NETLINK? If so, I
>> think we need to split (&u->lock)->rlock by family too, so that we
>> have u->lock-AF_UNIX and u->lock-AF_NETLINK.
>
> I think here is another problem. lockdep woried about
> sk->sk_receive_queue vs unix_sk(s)->lock.
>
> sk_diag_dump_icons() takes sk->sk_receive_queue and then
> unix_sk(s)->lock.
>
> unix_dgram_sendmsg takes unix_sk(sk)->lock and then sk->sk_receive_queue.
>
> sk_diag_dump_icons() takes locks for two different sockets, but
> unix_dgram_sendmsg() takes locks for one socket.
>
> sk_diag_dump_icons
>         if (sk->sk_state == TCP_LISTEN) {
>                 spin_lock(&sk->sk_receive_queue.lock);
>                 skb_queue_walk(&sk->sk_receive_queue, skb) {
>                         unix_state_lock_nested(req);
>                                 spin_lock_nested(&unix_sk(s)->lock,
>
>
> unix_dgram_sendmsg
>         unix_state_lock(other)
>                 spin_lock(&unix_sk(s)->lock)
>         skb_queue_tail(&other->sk_receive_queue, skb);
>                 spin_lock_irqsave(&list->lock, flags);


Do you mean the following?
There is socket 1 with state lock (S1) and queue lock (Q2), and socket
2 with state lock (S2) and queue lock (Q2). unix_dgram_sendmsg lock
S1->Q1. And sk_diag_dump_icons locks Q1->S2.
If yes, then this looks pretty much as deadlock. Consider that 2
unix_dgram_sendmsg in 2 different threads lock S1 and S2 respectively.
Now 2  sk_diag_dump_icons in 2 different threads lock Q1 and Q2
respectively. Now sk_diag_dump_icons want to lock S's, and
unix_dgram_sendmsg want to lock Q's. Nobody can proceed.

^ permalink raw reply

* Re: [PATCH RFC net-next 0/7] net/ipv6: Fix route append and replace use cases
From: Tobin C. Harding @ 2018-05-15  5:21 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, Thomas.Winter, idosch, sharpd, roopa
In-Reply-To: <20180515025112.16647-1-dsahern@gmail.com>

Hi David,

On Mon, May 14, 2018 at 07:51:05PM -0700, David Ahern wrote:
> This patch set fixes a few append and replace uses cases for IPv6 and
> adds test cases that codifies the expectations of how append and replace
> are expected to work.

Nood question: what commit does this apply on top of please.  I
attempted to apply the set on top of net-next

	commit (961423f9fcbc Merge branch 'sctp-Introduce-sctp_flush_ctx')

patch 4 and 5 have merge conflicts (I stopped at 5).

thanks,
Tobin.

^ permalink raw reply

* Re: [PATCH v1 2/4] media: bpf: allow raw IR decoder bpf programs to be used
From: Y Song @ 2018-05-15  5:22 UTC (permalink / raw)
  To: Sean Young
  Cc: linux-media, linux-kernel, Alexei Starovoitov,
	Mauro Carvalho Chehab, Daniel Borkmann, netdev, Matthias Reichl,
	Devin Heitmueller
In-Reply-To: <cd3a5e27ef4122fab90daae2af6031982df77282.1526331777.git.sean@mess.org>

On Mon, May 14, 2018 at 2:10 PM, Sean Young <sean@mess.org> wrote:
> This implements attaching, detaching, querying and execution. The target
> fd has to be the /dev/lircN device.
>
> Signed-off-by: Sean Young <sean@mess.org>
> ---
>  drivers/media/rc/ir-bpf-decoder.c | 191 ++++++++++++++++++++++++++++++
>  drivers/media/rc/lirc_dev.c       |  30 +++++
>  drivers/media/rc/rc-core-priv.h   |  15 +++
>  drivers/media/rc/rc-ir-raw.c      |   5 +
>  include/uapi/linux/bpf.h          |   1 +
>  kernel/bpf/syscall.c              |   7 ++
>  6 files changed, 249 insertions(+)
>
> diff --git a/drivers/media/rc/ir-bpf-decoder.c b/drivers/media/rc/ir-bpf-decoder.c
> index aaa5e208b1a5..651590a14772 100644
> --- a/drivers/media/rc/ir-bpf-decoder.c
> +++ b/drivers/media/rc/ir-bpf-decoder.c
> @@ -91,3 +91,194 @@ const struct bpf_verifier_ops ir_decoder_verifier_ops = {
>         .get_func_proto  = ir_decoder_func_proto,
>         .is_valid_access = ir_decoder_is_valid_access
>  };
> +
> +#define BPF_MAX_PROGS 64
> +
> +int rc_dev_bpf_attach(struct rc_dev *rcdev, struct bpf_prog *prog, u32 flags)

flags is not used in this function.

> +{
> +       struct ir_raw_event_ctrl *raw;
> +       struct bpf_prog_array __rcu *old_array;
> +       struct bpf_prog_array *new_array;
> +       int ret;
> +
> +       if (rcdev->driver_type != RC_DRIVER_IR_RAW)
> +               return -EINVAL;
> +
> +       ret = mutex_lock_interruptible(&rcdev->lock);
> +       if (ret)
> +               return ret;
> +
> +       raw = rcdev->raw;
> +
> +       if (raw->progs && bpf_prog_array_length(raw->progs) >= BPF_MAX_PROGS) {
> +               ret = -E2BIG;
> +               goto out;
> +       }
> +
> +       old_array = raw->progs;
> +       ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
> +       if (ret < 0)
> +               goto out;
> +
> +       rcu_assign_pointer(raw->progs, new_array);
> +       bpf_prog_array_free(old_array);
> +out:
> +       mutex_unlock(&rcdev->lock);
> +       return ret;
> +}
> +
> +int rc_dev_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog, u32 flags)

flags is not used in this function.

> +{
> +       struct ir_raw_event_ctrl *raw;
> +       struct bpf_prog_array __rcu *old_array;
> +       struct bpf_prog_array *new_array;
> +       int ret;
> +
> +       if (rcdev->driver_type != RC_DRIVER_IR_RAW)
> +               return -EINVAL;
> +
> +       ret = mutex_lock_interruptible(&rcdev->lock);
> +       if (ret)
> +               return ret;
> +
> +       raw = rcdev->raw;
> +
> +       old_array = raw->progs;
> +       ret = bpf_prog_array_copy(old_array, prog, NULL, &new_array);
> +       if (ret < 0) {
> +               bpf_prog_array_delete_safe(old_array, prog);
> +       } else {
> +               rcu_assign_pointer(raw->progs, new_array);
> +               bpf_prog_array_free(old_array);
> +       }
> +
> +       bpf_prog_put(prog);
> +       mutex_unlock(&rcdev->lock);
> +       return 0;
> +}
> +
> +void rc_dev_bpf_run(struct rc_dev *rcdev)
> +{
> +       struct ir_raw_event_ctrl *raw = rcdev->raw;
> +
> +       if (raw->progs)
> +               BPF_PROG_RUN_ARRAY(raw->progs, &raw->prev_ev, BPF_PROG_RUN);
> +}
> +
> +void rc_dev_bpf_put(struct rc_dev *rcdev)
> +{
> +       struct bpf_prog_array *progs = rcdev->raw->progs;
> +       int i, size;
> +
> +       if (!progs)
> +               return;
> +
> +       size = bpf_prog_array_length(progs);
> +       for (i = 0; i < size; i++)
> +               bpf_prog_put(progs->progs[i]);
> +
> +       bpf_prog_array_free(rcdev->raw->progs);
> +}
> +
> +int rc_dev_prog_attach(const union bpf_attr *attr)
> +{
> +       struct bpf_prog *prog;
> +       struct rc_dev *rcdev;
> +       int ret;
> +
> +       if (attr->attach_flags & BPF_F_ALLOW_OVERRIDE)
> +               return -EINVAL;

Looks like you really did not use flags except here.
BPF_F_ALLOW_OVERRIDE is originally used for
cgroup type of attachment and the comment explicits
saying so.

In the query below, the flags value "0" is copied to userspace.

In your case, I think you can just disallow any value, i.g.,
attr->attach_flags must be 0, and then you further down
check that if the prog is already in the array, you just return an error.

> +
> +       prog = bpf_prog_get_type(attr->attach_bpf_fd,
> +                                BPF_PROG_TYPE_RAWIR_DECODER);
> +       if (IS_ERR(prog))
> +               return PTR_ERR(prog);
> +
> +       rcdev = rc_dev_get_from_fd(attr->target_fd);
> +       if (IS_ERR(rcdev)) {
> +               bpf_prog_put(prog);
> +               return PTR_ERR(rcdev);
> +       }
> +
> +       ret = rc_dev_bpf_attach(rcdev, prog, attr->attach_flags);
> +       if (ret)
> +               bpf_prog_put(prog);
> +
> +       put_device(&rcdev->dev);
> +
> +       return ret;
> +}
> +
> +int rc_dev_prog_detach(const union bpf_attr *attr)
> +{
> +       struct bpf_prog *prog;
> +       struct rc_dev *rcdev;
> +       int ret;
> +
> +       if (attr->attach_flags & BPF_F_ALLOW_OVERRIDE)
> +               return -EINVAL;
> +
> +       prog = bpf_prog_get_type(attr->attach_bpf_fd,
> +                                BPF_PROG_TYPE_RAWIR_DECODER);
> +       if (IS_ERR(prog))
> +               return PTR_ERR(prog);
> +
> +       rcdev = rc_dev_get_from_fd(attr->target_fd);
> +       if (IS_ERR(rcdev)) {
> +               bpf_prog_put(prog);
> +               return PTR_ERR(rcdev);
> +       }
> +
> +       ret = rc_dev_bpf_detach(rcdev, prog, attr->attach_flags);
> +
> +       bpf_prog_put(prog);
> +       put_device(&rcdev->dev);
> +
> +       return ret;
> +}
> +
> +int rc_dev_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
> +{
> +       __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
> +       struct bpf_prog_array *progs;
> +       struct rc_dev *rcdev;
> +       u32 cnt, flags = 0;
> +       int ret;
> +
> +       if (attr->query.query_flags)
> +               return -EINVAL;
> +
> +       rcdev = rc_dev_get_from_fd(attr->query.target_fd);
> +       if (IS_ERR(rcdev))
> +               return PTR_ERR(rcdev);
> +
> +       if (rcdev->driver_type != RC_DRIVER_IR_RAW) {
> +               ret = -EINVAL;
> +               goto out;
> +       }
> +
> +       ret = mutex_lock_interruptible(&rcdev->lock);
> +       if (ret)
> +               goto out;
> +
> +       progs = rcdev->raw->progs;
> +       cnt = progs ? bpf_prog_array_length(progs) : 0;
> +
> +       if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt))) {
> +               ret = -EFAULT;
> +               goto out;
> +       }
> +       if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) {
> +               ret = -EFAULT;
> +               goto out;
> +       }
> +
> +       if (attr->query.prog_cnt != 0 && prog_ids && cnt)
> +               ret = bpf_prog_array_copy_to_user(progs, prog_ids, cnt);
> +
> +out:
> +       mutex_unlock(&rcdev->lock);
> +       put_device(&rcdev->dev);
> +
> +       return ret;
> +}
> diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
> index 24e9fbb80e81..65319f2ccc13 100644
> --- a/drivers/media/rc/lirc_dev.c
> +++ b/drivers/media/rc/lirc_dev.c
> @@ -20,6 +20,7 @@
>  #include <linux/module.h>
>  #include <linux/mutex.h>
>  #include <linux/device.h>
> +#include <linux/file.h>
>  #include <linux/idr.h>
>  #include <linux/poll.h>
>  #include <linux/sched.h>
> @@ -28,6 +29,8 @@
>  #include "rc-core-priv.h"
>  #include <uapi/linux/lirc.h>
>
> +#include <linux/bpf-rcdev.h>
> +
>  #define LIRCBUF_SIZE   256
>
>  static dev_t lirc_base_dev;
> @@ -816,4 +819,31 @@ void __exit lirc_dev_exit(void)
>         unregister_chrdev_region(lirc_base_dev, RC_DEV_MAX);
>  }
>
> +struct rc_dev *rc_dev_get_from_fd(int fd)
> +{
> +       struct rc_dev *dev;
> +       struct file *f;
> +
> +       f = fget_raw(fd);
> +       if (!f)
> +               return ERR_PTR(-EBADF);
> +
> +       if (!S_ISCHR(f->f_inode->i_mode) ||
> +           imajor(f->f_inode) != MAJOR(lirc_base_dev)) {
> +               fput(f);
> +               return ERR_PTR(-EBADF);
> +       }
> +
> +       dev = container_of(f->f_inode->i_cdev, struct rc_dev, lirc_cdev);
> +       if (!dev->registered) {
> +               fput(f);
> +               return ERR_PTR(-ENODEV);
> +       }
> +
> +       get_device(&dev->dev);
> +       fput(f);
> +
> +       return dev;
> +}
> +
>  MODULE_ALIAS("lirc_dev");
> diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h
> index e0e6a17460f6..b6f24f369657 100644
> --- a/drivers/media/rc/rc-core-priv.h
> +++ b/drivers/media/rc/rc-core-priv.h
> @@ -57,6 +57,9 @@ struct ir_raw_event_ctrl {
>         /* raw decoder state follows */
>         struct ir_raw_event prev_ev;
>         struct ir_raw_event this_ev;
> +#ifdef CONFIG_IR_BPF_DECODER
> +       struct bpf_prog_array *progs;
> +#endif
>         struct nec_dec {
>                 int state;
>                 unsigned count;
> @@ -288,6 +291,7 @@ void ir_lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev);
>  void ir_lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc);
>  int ir_lirc_register(struct rc_dev *dev);
>  void ir_lirc_unregister(struct rc_dev *dev);
> +struct rc_dev *rc_dev_get_from_fd(int fd);
>  #else
>  static inline int lirc_dev_init(void) { return 0; }
>  static inline void lirc_dev_exit(void) {}
> @@ -299,4 +303,15 @@ static inline int ir_lirc_register(struct rc_dev *dev) { return 0; }
>  static inline void ir_lirc_unregister(struct rc_dev *dev) { }
>  #endif
>
> +/*
> + * bpf interface
> + */
> +#ifdef CONFIG_IR_BPF_DECODER
> +void rc_dev_bpf_put(struct rc_dev *dev);
> +void rc_dev_bpf_run(struct rc_dev *dev);
> +#else
> +void rc_dev_bpf_put(struct rc_dev *dev) {}
> +void rc_dev_bpf_run(struct rc_dev *dev) {}
> +#endif
> +
>  #endif /* _RC_CORE_PRIV */
> diff --git a/drivers/media/rc/rc-ir-raw.c b/drivers/media/rc/rc-ir-raw.c
> index 374f83105a23..efddd9c44466 100644
> --- a/drivers/media/rc/rc-ir-raw.c
> +++ b/drivers/media/rc/rc-ir-raw.c
> @@ -8,6 +8,8 @@
>  #include <linux/mutex.h>
>  #include <linux/kmod.h>
>  #include <linux/sched.h>
> +#include <linux/filter.h>
> +#include <linux/bpf.h>
>  #include "rc-core-priv.h"
>
>  /* Used to keep track of IR raw clients, protected by ir_raw_handler_lock */
> @@ -33,6 +35,7 @@ static int ir_raw_event_thread(void *data)
>                                         handler->decode(raw->dev, ev);
>                         ir_lirc_raw_event(raw->dev, ev);
>                         raw->prev_ev = ev;
> +                       rc_dev_bpf_run(raw->dev);
>                 }
>                 mutex_unlock(&ir_raw_handler_lock);
>
> @@ -623,6 +626,8 @@ void ir_raw_event_unregister(struct rc_dev *dev)
>                         handler->raw_unregister(dev);
>         mutex_unlock(&ir_raw_handler_lock);
>
> +       rc_dev_bpf_put(dev);
> +
>         ir_raw_event_free(dev);
>  }
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 6ad053e831c0..d9740599adf6 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -155,6 +155,7 @@ enum bpf_attach_type {
>         BPF_CGROUP_INET6_CONNECT,
>         BPF_CGROUP_INET4_POST_BIND,
>         BPF_CGROUP_INET6_POST_BIND,
> +       BPF_RAWIR_DECODER,
>         __MAX_BPF_ATTACH_TYPE
>  };
>
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 016ef9025827..63ecc1f2e1e3 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -27,6 +27,7 @@
>  #include <linux/timekeeping.h>
>  #include <linux/ctype.h>
>  #include <linux/nospec.h>
> +#include <linux/bpf-rcdev.h>
>
>  #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
>                            (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
> @@ -1556,6 +1557,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
>         case BPF_SK_SKB_STREAM_PARSER:
>         case BPF_SK_SKB_STREAM_VERDICT:
>                 return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
> +       case BPF_RAWIR_DECODER:
> +               return rc_dev_prog_attach(attr);
>         default:
>                 return -EINVAL;
>         }
> @@ -1626,6 +1629,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
>         case BPF_SK_SKB_STREAM_PARSER:
>         case BPF_SK_SKB_STREAM_VERDICT:
>                 return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false);
> +       case BPF_RAWIR_DECODER:
> +               return rc_dev_prog_detach(attr);
>         default:
>                 return -EINVAL;
>         }
> @@ -1673,6 +1678,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
>         case BPF_CGROUP_SOCK_OPS:
>         case BPF_CGROUP_DEVICE:
>                 break;
> +       case BPF_RAWIR_DECODER:
> +               return rc_dev_prog_query(attr, uattr);
>         default:
>                 return -EINVAL;
>         }
> --
> 2.17.0
>

^ permalink raw reply

* Re: [PATCH v1 3/4] media: rc bpf: move ir_raw_event to uapi
From: Y Song @ 2018-05-15  5:26 UTC (permalink / raw)
  To: Sean Young
  Cc: linux-media, linux-kernel, Alexei Starovoitov,
	Mauro Carvalho Chehab, Daniel Borkmann, netdev, Matthias Reichl,
	Devin Heitmueller
In-Reply-To: <6ecdbd01b8c42c8784f2235c1e5109dac3dd86a5.1526331777.git.sean@mess.org>

On Mon, May 14, 2018 at 2:11 PM, Sean Young <sean@mess.org> wrote:
> The context provided to a BPF_PROG_RAWIR_DECODER is a struct ir_raw_event;
> ensure user space has a a definition.
>
> Signed-off-by: Sean Young <sean@mess.org>
> ---
>  include/media/rc-core.h        | 19 +------------------
>  include/uapi/linux/bpf_rcdev.h | 24 ++++++++++++++++++++++++

Patch #2 already referenced this file. So if Patches #1 and #2
applied, there will be
a compilation error. Could you re-arrange your patchset so that after
sequentially
applying each patch, there is no compilation error?

>  2 files changed, 25 insertions(+), 18 deletions(-)
>  create mode 100644 include/uapi/linux/bpf_rcdev.h
>
> diff --git a/include/media/rc-core.h b/include/media/rc-core.h
> index 6742fd86ff65..5d31e31d8ade 100644
> --- a/include/media/rc-core.h
> +++ b/include/media/rc-core.h
> @@ -21,6 +21,7 @@
>  #include <linux/kfifo.h>
>  #include <linux/time.h>
>  #include <linux/timer.h>
> +#include <uapi/linux/bpf_rcdev.h>
>  #include <media/rc-map.h>
>
>  /**
> @@ -299,24 +300,6 @@ void rc_keydown_notimeout(struct rc_dev *dev, enum rc_proto protocol,
>  void rc_keyup(struct rc_dev *dev);
>  u32 rc_g_keycode_from_table(struct rc_dev *dev, u32 scancode);
>
> -/*
> - * From rc-raw.c
> - * The Raw interface is specific to InfraRed. It may be a good idea to
> - * split it later into a separate header.
> - */
> -struct ir_raw_event {
> -       union {
> -               u32             duration;
> -               u32             carrier;
> -       };
> -       u8                      duty_cycle;
> -
> -       unsigned                pulse:1;
> -       unsigned                reset:1;
> -       unsigned                timeout:1;
> -       unsigned                carrier_report:1;
> -};
> -
>  #define DEFINE_IR_RAW_EVENT(event) struct ir_raw_event event = {}
>
>  static inline void init_ir_raw_event(struct ir_raw_event *ev)
> diff --git a/include/uapi/linux/bpf_rcdev.h b/include/uapi/linux/bpf_rcdev.h
> new file mode 100644
> index 000000000000..d8629ff2b960
> --- /dev/null
> +++ b/include/uapi/linux/bpf_rcdev.h
> @@ -0,0 +1,24 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/* Copyright (c) 2018 Sean Young <sean@mess.org>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + */
> +#ifndef _UAPI__LINUX_BPF_RCDEV_H__
> +#define _UAPI__LINUX_BPF_RCDEV_H__
> +
> +struct ir_raw_event {
> +       union {
> +               __u32           duration;
> +               __u32           carrier;
> +       };
> +       __u8                    duty_cycle;
> +
> +       unsigned                pulse:1;
> +       unsigned                reset:1;
> +       unsigned                timeout:1;
> +       unsigned                carrier_report:1;
> +};
> +
> +#endif /* _UAPI__LINUX_BPF_RCDEV_H__ */
> --
> 2.17.0
>

^ permalink raw reply

* Re: [PATCH net-next] sched: cls: enable verbose logging
From: Cong Wang @ 2018-05-15  5:31 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner
  Cc: Linux Kernel Network Developers, Jakub Kicinski, David Ahern,
	Stephen Hemminger, Jiri Pirko, Alexander Aring, Jamal Hadi Salim
In-Reply-To: <20180514204725.GT4977@localhost.localdomain>

On Mon, May 14, 2018 at 1:47 PM, Marcelo Ricardo Leitner
<marcelo.leitner@gmail.com> wrote:
> On Mon, May 14, 2018 at 01:30:53PM -0700, Cong Wang wrote:
>> On Sun, May 13, 2018 at 1:44 PM, Marcelo Ricardo Leitner
>> <marcelo.leitner@gmail.com> wrote:
>> > Currently, when the rule is not to be exclusively executed by the
>> > hardware, extack is not passed along and offloading failures don't
>> > get logged. The idea was that hardware failures are okay because the
>> > rule will get executed in software then and this way it doesn't confuse
>> > unware users.
>> >
>> > But this is not helpful in case one needs to understand why a certain
>> > rule failed to get offloaded. Considering it may have been a temporary
>> > failure, like resources exceeded or so, reproducing it later and knowing
>> > that it is triggering the same reason may be challenging.
>>
>> I fail to understand why you need a flag here, IOW, why not just pass
>> extack unconditionally?
>
> Because (as discussed in the RFC[1], should have linked it here) it
> could confuse users that are not aware of offloading and, in other
> cases, it can be just noise (like it would be right now for ebpf,
> which is mostly used in sw-path).
>
> 1.https://www.mail-archive.com/netdev@vger.kernel.org/msg223016.html

My point is that a TC filter flag should be used for a filter attribute,
logging is apparently not a part of filter. At least, put it into HW offloading,
not in TC filter.

I know DaveM hates module parameters, but a module parameter here
is more suitable than a TC filter flag.

^ permalink raw reply

* Re: [PATCH net-next] erspan: set bso bit based on mirrored packet's len
From: Tobin C. Harding @ 2018-05-15  5:33 UTC (permalink / raw)
  To: William Tu; +Cc: netdev
In-Reply-To: <1526342076-43714-1-git-send-email-u9012063@gmail.com>

On Mon, May 14, 2018 at 04:54:36PM -0700, William Tu wrote:
> Before the patch, the erspan BSO bit (Bad/Short/Oversized) is not
> handled.  BSO has 4 possible values:
>   00 --> Good frame with no error, or unknown integrity
>   11 --> Payload is a Bad Frame with CRC or Alignment Error
>   01 --> Payload is a Short Frame
>   10 --> Payload is an Oversized Frame
> 
> Based the short/oversized definitions in RFC1757, the patch sets
> the bso bit based on the mirrored packet's size.
> 
> Reported-by: Xiaoyan Jin <xiaoyanj@vmware.com>
> Signed-off-by: William Tu <u9012063@gmail.com>
> ---
>  include/net/erspan.h | 25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)
> 
> diff --git a/include/net/erspan.h b/include/net/erspan.h
> index d044aa60cc76..5eb95f78ad45 100644
> --- a/include/net/erspan.h
> +++ b/include/net/erspan.h
> @@ -219,6 +219,30 @@ static inline __be32 erspan_get_timestamp(void)
>  	return htonl((u32)h_usecs);
>  }
>  
> +/* ERSPAN BSO (Bad/Short/Oversized)
> + *   00b --> Good frame with no error, or unknown integrity
> + *   01b --> Payload is a Short Frame
> + *   10b --> Payload is an Oversized Frame
> + *   11b --> Payload is a Bad Frame with CRC or Alignment Error
> + */
> +enum erspan_bso {
> +	BSO_NOERROR,
> +	BSO_SHORT,
> +	BSO_OVERSIZED,
> +	BSO_BAD,
> +};

If we are relying on the values perhaps this would be clearer

	BSO_NOERROR	= 0x00,
	BSO_SHORT 	= 0x01,
	BSO_OVERSIZED 	= 0x02,
	BSO_BAD 	= 0x03,

> +
> +static inline u8 erspan_detect_bso(struct sk_buff *skb)
> +{
> +	if (skb->len < ETH_ZLEN)
> +		return BSO_SHORT;
> +
> +	if (skb->len > ETH_FRAME_LEN)
> +		return BSO_OVERSIZED;
> +
> +	return BSO_NOERROR;
> +}

Without having much contextual knowledge around this patch; should we be
doing some check on CRC or alignment (at some stage)?  Having BSO_BAD
seems to imply so? 


Hope this helps,
Tobin.

^ permalink raw reply

* Re: [PATCH v1 4/4] samples/bpf: an example of a raw IR decoder
From: Y Song @ 2018-05-15  5:34 UTC (permalink / raw)
  To: Sean Young
  Cc: linux-media, linux-kernel, Alexei Starovoitov,
	Mauro Carvalho Chehab, Daniel Borkmann, netdev, Matthias Reichl,
	Devin Heitmueller
In-Reply-To: <1c8efa5dca5a8b97b81946b38044f826d12b8f50.1526331777.git.sean@mess.org>

On Mon, May 14, 2018 at 2:11 PM, Sean Young <sean@mess.org> wrote:
> This implements the grundig-16 IR protocol.
>
> Signed-off-by: Sean Young <sean@mess.org>
> ---
>  samples/bpf/Makefile                      |   4 +
>  samples/bpf/bpf_load.c                    |   9 +-
>  samples/bpf/grundig_decoder_kern.c        | 112 ++++++++++++++++++++++
>  samples/bpf/grundig_decoder_user.c        |  54 +++++++++++
>  tools/bpf/bpftool/prog.c                  |   1 +
>  tools/include/uapi/linux/bpf.h            |  17 +++-
>  tools/testing/selftests/bpf/bpf_helpers.h |   6 ++
>  7 files changed, 200 insertions(+), 3 deletions(-)
>  create mode 100644 samples/bpf/grundig_decoder_kern.c
>  create mode 100644 samples/bpf/grundig_decoder_user.c
>
> diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
> index 4d6a6edd4bf6..c6fa111f103a 100644
> --- a/samples/bpf/Makefile
> +++ b/samples/bpf/Makefile
> @@ -44,6 +44,7 @@ hostprogs-y += xdp_monitor
>  hostprogs-y += xdp_rxq_info
>  hostprogs-y += syscall_tp
>  hostprogs-y += cpustat
> +hostprogs-y += grundig_decoder
>
>  # Libbpf dependencies
>  LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
> @@ -95,6 +96,7 @@ xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
>  xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
>  syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
>  cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
> +grundig_decoder-objs := bpf_load.o $(LIBBPF) grundig_decoder_user.o
>
>  # Tell kbuild to always build the programs
>  always := $(hostprogs-y)
> @@ -148,6 +150,7 @@ always += xdp_rxq_info_kern.o
>  always += xdp2skb_meta_kern.o
>  always += syscall_tp_kern.o
>  always += cpustat_kern.o
> +always += grundig_decoder_kern.o
>
>  HOSTCFLAGS += -I$(objtree)/usr/include
>  HOSTCFLAGS += -I$(srctree)/tools/lib/
> @@ -193,6 +196,7 @@ HOSTLOADLIBES_xdp_monitor += -lelf
>  HOSTLOADLIBES_xdp_rxq_info += -lelf
>  HOSTLOADLIBES_syscall_tp += -lelf
>  HOSTLOADLIBES_cpustat += -lelf
> +HOSTLOADLIBES_grundig_decoder += -lelf
>
>  # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
>  #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
> diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
> index bebe4188b4b3..0fd389e95bb9 100644
> --- a/samples/bpf/bpf_load.c
> +++ b/samples/bpf/bpf_load.c
> @@ -69,6 +69,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
>         bool is_sockops = strncmp(event, "sockops", 7) == 0;
>         bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
>         bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0;
> +       bool is_ir_decoder = strncmp(event, "ir_decoder", 10) == 0;
>         size_t insns_cnt = size / sizeof(struct bpf_insn);
>         enum bpf_prog_type prog_type;
>         char buf[256];
> @@ -102,6 +103,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
>                 prog_type = BPF_PROG_TYPE_SK_SKB;
>         } else if (is_sk_msg) {
>                 prog_type = BPF_PROG_TYPE_SK_MSG;
> +       } else if (is_ir_decoder) {
> +               prog_type = BPF_PROG_TYPE_RAWIR_DECODER;
>         } else {
>                 printf("Unknown event '%s'\n", event);
>                 return -1;
> @@ -116,7 +119,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
>
>         prog_fd[prog_cnt++] = fd;
>
> -       if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
> +       if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk ||
> +           is_ir_decoder)
>                 return 0;
>
>         if (is_socket || is_sockops || is_sk_skb || is_sk_msg) {
> @@ -607,7 +611,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
>                     memcmp(shname, "cgroup/", 7) == 0 ||
>                     memcmp(shname, "sockops", 7) == 0 ||
>                     memcmp(shname, "sk_skb", 6) == 0 ||
> -                   memcmp(shname, "sk_msg", 6) == 0) {
> +                   memcmp(shname, "sk_msg", 6) == 0 ||
> +                   memcmp(shname, "ir_decoder", 10) == 0) {
>                         ret = load_and_attach(shname, data->d_buf,
>                                               data->d_size);
>                         if (ret != 0)
> diff --git a/samples/bpf/grundig_decoder_kern.c b/samples/bpf/grundig_decoder_kern.c
> new file mode 100644
> index 000000000000..c80f2c9cc69a
> --- /dev/null
> +++ b/samples/bpf/grundig_decoder_kern.c
> @@ -0,0 +1,112 @@
> +
> +#include <uapi/linux/bpf.h>
> +#include <uapi/linux/bpf_rcdev.h>
> +#include "bpf_helpers.h"
> +#include <linux/version.h>
> +
> +enum grundig_state {
> +       STATE_INACTIVE,
> +       STATE_HEADER_SPACE,
> +       STATE_LEADING_PULSE,
> +       STATE_BITS_SPACE,
> +       STATE_BITS_PULSE,
> +};
> +
> +struct decoder_state {
> +       u32 bits;
> +       enum grundig_state state;
> +       u32 count;
> +       u32 last_space;
> +};
> +
> +struct bpf_map_def SEC("maps") decoder_state_map = {
> +       .type = BPF_MAP_TYPE_ARRAY,
> +       .key_size = sizeof(u32),
> +       .value_size = sizeof(struct decoder_state),
> +       .max_entries = 1,
> +};
> +
> +#define US_TO_NS(t) 1000*(t)
> +static inline bool eq_margin(unsigned d1, unsigned d2, unsigned margin)
> +{
> +       return ((d1 > (d2 - margin)) && (d1 < (d2 + margin)));
> +}
> +
> +SEC("ir_decoder/grundig_decoder")
> +int bpf_decoder(struct ir_raw_event *raw)
> +{
> +       u32 key = 0;
> +       struct decoder_state init = {};
> +
> +       struct decoder_state *s = bpf_map_lookup_elem(&decoder_state_map, &key);
> +
> +       if (!s)
> +               s = &init;
> +
> +       if (raw->carrier_report) {
> +               // ignore
> +       } else if (raw->reset) {
> +               s->state = STATE_INACTIVE;
> +       } else if (s->state == STATE_INACTIVE) {
> +               if (raw->pulse && eq_margin(US_TO_NS(900), raw->duration, US_TO_NS(100))) {
> +                       s->bits = 0;
> +                       s->state = STATE_HEADER_SPACE;
> +                       s->count = 0;
> +               }
> +       } else if (s->state == STATE_HEADER_SPACE) {
> +               if (!raw->pulse && eq_margin(US_TO_NS(2900), raw->duration, US_TO_NS(200)))
> +                       s->state = STATE_LEADING_PULSE;
> +               else
> +                       s->state = STATE_INACTIVE;
> +       } else if (s->state == STATE_LEADING_PULSE) {
> +               if (raw->pulse && eq_margin(US_TO_NS(1300), raw->duration, US_TO_NS(100)))
> +                       s->state = STATE_BITS_SPACE;
> +               else
> +                       s->state = STATE_INACTIVE;
> +       } else if (s->state == STATE_BITS_SPACE) {
> +               s->last_space = raw->duration;
> +               s->state = STATE_BITS_PULSE;
> +       } else if (s->state == STATE_BITS_PULSE) {
> +               int t = -1;
> +               if (eq_margin(s->last_space, US_TO_NS(472), US_TO_NS(150)) &&
> +                   eq_margin(raw->duration, US_TO_NS(583), US_TO_NS(150)))
> +                       t = 0;
> +               if (eq_margin(s->last_space, US_TO_NS(1139), US_TO_NS(150)) &&
> +                   eq_margin(raw->duration, US_TO_NS(583), US_TO_NS(150)))
> +                       t = 1;
> +               if (eq_margin(s->last_space, US_TO_NS(1806), US_TO_NS(150)) &&
> +                   eq_margin(raw->duration, US_TO_NS(583), US_TO_NS(150)))
> +                       t = 2;
> +               if (eq_margin(s->last_space, US_TO_NS(2200), US_TO_NS(150)) &&
> +                   eq_margin(raw->duration, US_TO_NS(1139), US_TO_NS(150)))
> +                       t = 3;
> +               if (t < 0) {
> +                       s->state = STATE_INACTIVE;
> +               } else {
> +                       s->bits <<= 2;
> +                       switch (t) {
> +                       case 3: s->bits |= 0; break;
> +                       case 2: s->bits |= 3; break;
> +                       case 1: s->bits |= 2; break;
> +                       case 0: s->bits |= 1; break;
> +                       }
> +                       s->count += 2;
> +                       if (s->count == 16) {
> +                               bpf_rc_keydown(raw, 0x40, s->bits, 0);
> +                               s->state = STATE_INACTIVE;
> +                       } else {
> +                               s->state = STATE_BITS_SPACE;
> +                       }
> +               }
> +       }
> +
> +       if (s == &init)
> +               bpf_map_update_elem(&decoder_state_map, &key, s, BPF_NOEXIST);
> +
> +       return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> +
> +u32 _version SEC("version") = LINUX_VERSION_CODE;
> +
> diff --git a/samples/bpf/grundig_decoder_user.c b/samples/bpf/grundig_decoder_user.c
> new file mode 100644
> index 000000000000..61e8ee5f73ee
> --- /dev/null
> +++ b/samples/bpf/grundig_decoder_user.c
> @@ -0,0 +1,54 @@
> +
> +#include <linux/bpf.h>
> +#include <assert.h>
> +#include <errno.h>
> +#include <signal.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdbool.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <libgen.h>
> +#include <sys/resource.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +
> +#include "bpf_load.h"
> +#include "bpf_util.h"
> +#include "libbpf.h"
> +
> +int main(int argc, char **argv)
> +{
> +       char filename[256];
> +       int ret, lircfd;
> +
> +       if (argc != 2) {
> +               printf("Usage: %s /dev/lircN\n", argv[0]);

Looks like the test requires /dev/lircN device. Is there any easy way
to test it?

Also, looks like the program does not depend on any kernel headers,
maybe it can be
moved to tools/testing/selftests/bpf/? There are testbot to run those
tests regularly.

> +               return 2;
> +       }
> +
> +       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
> +
> +       if (load_bpf_file(filename)) {
> +               printf("%s", bpf_log_buf);
> +               return 1;
> +       }
> +
> +       lircfd = open(argv[1], O_RDWR);
> +       if (lircfd == -1) {
> +               printf("failed to open lirc device %s: %m\n", argv[1]);
> +               return 1;
> +       }
> +
> +       ret = bpf_prog_attach(prog_fd[0], lircfd, BPF_RAWIR_DECODER, 0);
> +       if (ret) {
> +               printf("Failed to attach bpf to lirc device: %m\n");
> +               return 1;
> +       }
> +
> +       printf("Grundig IR decoder loaded and attached. Hit any key to stop\n");
> +       getchar();
> +
> +       return 0;
> +}
> diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
> index f7a810897eac..ae1c26df212d 100644
> --- a/tools/bpf/bpftool/prog.c
> +++ b/tools/bpf/bpftool/prog.c
> @@ -68,6 +68,7 @@ static const char * const prog_type_name[] = {
>         [BPF_PROG_TYPE_SOCK_OPS]        = "sock_ops",
>         [BPF_PROG_TYPE_SK_SKB]          = "sk_skb",
>         [BPF_PROG_TYPE_CGROUP_DEVICE]   = "cgroup_device",
> +       [BPF_PROG_TYPE_RAWIR_DECODER]   = "ir_decoder",
>  };
>
>  static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index c5ec89732a8d..d9740599adf6 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -137,6 +137,7 @@ enum bpf_prog_type {
>         BPF_PROG_TYPE_SK_MSG,
>         BPF_PROG_TYPE_RAW_TRACEPOINT,
>         BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
> +       BPF_PROG_TYPE_RAWIR_DECODER,
>  };
>
>  enum bpf_attach_type {
> @@ -154,6 +155,7 @@ enum bpf_attach_type {
>         BPF_CGROUP_INET6_CONNECT,
>         BPF_CGROUP_INET4_POST_BIND,
>         BPF_CGROUP_INET6_POST_BIND,
> +       BPF_RAWIR_DECODER,
>         __MAX_BPF_ATTACH_TYPE
>  };
>
> @@ -755,6 +757,17 @@ union bpf_attr {
>   *     @addr: pointer to struct sockaddr to bind socket to
>   *     @addr_len: length of sockaddr structure
>   *     Return: 0 on success or negative error code
> + *
> + * int bpf_rc_keydown(ctx, protocol, scancode, toggle)
> + *     Report decoded scancode with toggle value
> + *     @ctx: pointer to ctx
> + *     @protocol: decoded protocol
> + *     @scancode: decoded scancode
> + *     @toggle: set to 1 if button was toggled, else 0
> + *
> + * int bpf_rc_repeat(ctx)
> + *     Repeat the last decoded scancode
> + *     @ctx: pointer to ctx
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -821,7 +834,9 @@ union bpf_attr {
>         FN(msg_apply_bytes),            \
>         FN(msg_cork_bytes),             \
>         FN(msg_pull_data),              \
> -       FN(bind),
> +       FN(bind),                       \
> +       FN(rc_repeat),                  \
> +       FN(rc_keydown),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
> index d8223d99f96d..4bf23d3dfc33 100644
> --- a/tools/testing/selftests/bpf/bpf_helpers.h
> +++ b/tools/testing/selftests/bpf/bpf_helpers.h
> @@ -96,6 +96,12 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
>         (void *) BPF_FUNC_msg_pull_data;
>  static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
>         (void *) BPF_FUNC_bind;
> +static int (*bpf_rc_repeat)(void *ctx) =
> +       (void *) BPF_FUNC_rc_repeat;
> +static int (*bpf_rc_keydown)(void *ctx, unsigned protocol, unsigned scancode,
> +                            unsigned toggle) =
> +       (void *) BPF_FUNC_rc_keydown;
> +
>
>  /* llvm builtin functions that eBPF C program may use to
>   * emit BPF_LD_ABS and BPF_LD_IND instructions
> --
> 2.17.0
>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox