Netdev List
 help / color / mirror / Atom feed
* [PATCH v2 bpf-next 6/9] selftests/bpf: abstract away test log output
From: Andrii Nakryiko @ 2019-07-27 19:01 UTC (permalink / raw)
  To: bpf, netdev, ast, daniel, sdf
  Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190727190150.649137-1-andriin@fb.com>

This patch changes how test output is printed out. By default, if test
had no errors, the only output will be a single line with test number,
name, and verdict at the end, e.g.:

  #31 xdp:OK

If test had any errors, all log output captured during test execution
will be output after test completes.

It's possible to force output of log with `-v` (`--verbose`) option, in
which case output won't be buffered and will be output immediately.

To support this, individual tests are required to use helper methods for
logging: `test__printf()` and `test__vprintf()`.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 .../selftests/bpf/prog_tests/bpf_obj_id.c     |   6 +-
 .../bpf/prog_tests/bpf_verif_scale.c          |  31 ++--
 .../bpf/prog_tests/get_stack_raw_tp.c         |   4 +-
 .../selftests/bpf/prog_tests/l4lb_all.c       |   2 +-
 .../selftests/bpf/prog_tests/map_lock.c       |  10 +-
 .../selftests/bpf/prog_tests/send_signal.c    |   8 +-
 .../selftests/bpf/prog_tests/spinlock.c       |   2 +-
 .../bpf/prog_tests/stacktrace_build_id.c      |   4 +-
 .../bpf/prog_tests/stacktrace_build_id_nmi.c  |   4 +-
 .../selftests/bpf/prog_tests/xdp_noinline.c   |   3 +-
 tools/testing/selftests/bpf/test_progs.c      | 135 +++++++++++++-----
 tools/testing/selftests/bpf/test_progs.h      |  37 ++++-
 12 files changed, 173 insertions(+), 73 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index cb827383db4d..fb5840a62548 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -106,8 +106,8 @@ void test_bpf_obj_id(void)
 		if (CHECK(err ||
 			  prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
 			  info_len != sizeof(struct bpf_prog_info) ||
-			  (jit_enabled && !prog_infos[i].jited_prog_len) ||
-			  (jit_enabled &&
+			  (env.jit_enabled && !prog_infos[i].jited_prog_len) ||
+			  (env.jit_enabled &&
 			   !memcmp(jited_insns, zeros, sizeof(zeros))) ||
 			  !prog_infos[i].xlated_prog_len ||
 			  !memcmp(xlated_insns, zeros, sizeof(zeros)) ||
@@ -121,7 +121,7 @@ void test_bpf_obj_id(void)
 			  err, errno, i,
 			  prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
 			  info_len, sizeof(struct bpf_prog_info),
-			  jit_enabled,
+			  env.jit_enabled,
 			  prog_infos[i].jited_prog_len,
 			  prog_infos[i].xlated_prog_len,
 			  !!memcmp(jited_insns, zeros, sizeof(zeros)),
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index ceddb8cc86f4..b59017279e0b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -4,12 +4,15 @@
 static int libbpf_debug_print(enum libbpf_print_level level,
 			      const char *format, va_list args)
 {
-	if (level != LIBBPF_DEBUG)
-		return vfprintf(stderr, format, args);
+	if (level != LIBBPF_DEBUG) {
+		test__vprintf(format, args);
+		return 0;
+	}
 
 	if (!strstr(format, "verifier log"))
 		return 0;
-	return vfprintf(stderr, "%s", args);
+	test__vprintf("%s", args);
+	return 0;
 }
 
 static int check_load(const char *file, enum bpf_prog_type type)
@@ -73,32 +76,38 @@ void test_bpf_verif_scale(void)
 	libbpf_print_fn_t old_print_fn = NULL;
 	int err, i;
 
-	if (verifier_stats)
+	if (env.verifier_stats) {
+		test__force_log();
 		old_print_fn = libbpf_set_print(libbpf_debug_print);
+	}
 
 	err = check_load("./loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT);
-	printf("test_scale:loop3:%s\n", err ? (error_cnt--, "OK") : "FAIL");
+	test__printf("test_scale:loop3:%s\n",
+		     err ? (error_cnt--, "OK") : "FAIL");
 
 	for (i = 0; i < ARRAY_SIZE(sched_cls); i++) {
 		err = check_load(sched_cls[i], BPF_PROG_TYPE_SCHED_CLS);
-		printf("test_scale:%s:%s\n", sched_cls[i], err ? "FAIL" : "OK");
+		test__printf("test_scale:%s:%s\n", sched_cls[i],
+			     err ? "FAIL" : "OK");
 	}
 
 	for (i = 0; i < ARRAY_SIZE(raw_tp); i++) {
 		err = check_load(raw_tp[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
-		printf("test_scale:%s:%s\n", raw_tp[i], err ? "FAIL" : "OK");
+		test__printf("test_scale:%s:%s\n", raw_tp[i],
+			     err ? "FAIL" : "OK");
 	}
 
 	for (i = 0; i < ARRAY_SIZE(cg_sysctl); i++) {
 		err = check_load(cg_sysctl[i], BPF_PROG_TYPE_CGROUP_SYSCTL);
-		printf("test_scale:%s:%s\n", cg_sysctl[i], err ? "FAIL" : "OK");
+		test__printf("test_scale:%s:%s\n", cg_sysctl[i],
+			     err ? "FAIL" : "OK");
 	}
 	err = check_load("./test_xdp_loop.o", BPF_PROG_TYPE_XDP);
-	printf("test_scale:test_xdp_loop:%s\n", err ? "FAIL" : "OK");
+	test__printf("test_scale:test_xdp_loop:%s\n", err ? "FAIL" : "OK");
 
 	err = check_load("./test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL);
-	printf("test_scale:test_seg6_loop:%s\n", err ? "FAIL" : "OK");
+	test__printf("test_scale:test_seg6_loop:%s\n", err ? "FAIL" : "OK");
 
-	if (verifier_stats)
+	if (env.verifier_stats)
 		libbpf_set_print(old_print_fn);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index 9d73a8f932ac..3d59b3c841fe 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -41,7 +41,7 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
 		 * just assume it is good if the stack is not empty.
 		 * This could be improved in the future.
 		 */
-		if (jit_enabled) {
+		if (env.jit_enabled) {
 			found = num_stack > 0;
 		} else {
 			for (i = 0; i < num_stack; i++) {
@@ -58,7 +58,7 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
 		}
 	} else {
 		num_stack = e->kern_stack_size / sizeof(__u64);
-		if (jit_enabled) {
+		if (env.jit_enabled) {
 			good_kern_stack = num_stack > 0;
 		} else {
 			for (i = 0; i < num_stack; i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index 20ddca830e68..5ce572c03a5f 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -74,7 +74,7 @@ static void test_l4lb(const char *file)
 	}
 	if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
 		error_cnt++;
-		printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts);
+		test__printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts);
 	}
 out:
 	bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index ee99368c595c..2e78217ed3fd 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -9,12 +9,12 @@ static void *parallel_map_access(void *arg)
 	for (i = 0; i < 10000; i++) {
 		err = bpf_map_lookup_elem_flags(map_fd, &key, vars, BPF_F_LOCK);
 		if (err) {
-			printf("lookup failed\n");
+			test__printf("lookup failed\n");
 			error_cnt++;
 			goto out;
 		}
 		if (vars[0] != 0) {
-			printf("lookup #%d var[0]=%d\n", i, vars[0]);
+			test__printf("lookup #%d var[0]=%d\n", i, vars[0]);
 			error_cnt++;
 			goto out;
 		}
@@ -22,8 +22,8 @@ static void *parallel_map_access(void *arg)
 		for (j = 2; j < 17; j++) {
 			if (vars[j] == rnd)
 				continue;
-			printf("lookup #%d var[1]=%d var[%d]=%d\n",
-			       i, rnd, j, vars[j]);
+			test__printf("lookup #%d var[1]=%d var[%d]=%d\n",
+				     i, rnd, j, vars[j]);
 			error_cnt++;
 			goto out;
 		}
@@ -43,7 +43,7 @@ void test_map_lock(void)
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
 	if (err) {
-		printf("test_map_lock:bpf_prog_load errno %d\n", errno);
+		test__printf("test_map_lock:bpf_prog_load errno %d\n", errno);
 		goto close_prog;
 	}
 	map_fd[0] = bpf_find_map(__func__, obj, "hash_map");
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 54218ee3c004..d950f4558897 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -202,8 +202,8 @@ static int test_send_signal_nmi(void)
 			 -1 /* cpu */, -1 /* group_fd */, 0 /* flags */);
 	if (pmu_fd == -1) {
 		if (errno == ENOENT) {
-			printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n",
-				__func__);
+			test__printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n",
+				     __func__);
 			return 0;
 		}
 		/* Let the test fail with a more informative message */
@@ -222,8 +222,4 @@ void test_send_signal(void)
 	ret |= test_send_signal_tracepoint();
 	ret |= test_send_signal_perf();
 	ret |= test_send_signal_nmi();
-	if (!ret)
-		printf("test_send_signal:OK\n");
-	else
-		printf("test_send_signal:FAIL\n");
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 114ebe6a438e..deb2db5b85b0 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -12,7 +12,7 @@ void test_spinlock(void)
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
 	if (err) {
-		printf("test_spin_lock:bpf_prog_load errno %d\n", errno);
+		test__printf("test_spin_lock:bpf_prog_load errno %d\n", errno);
 		goto close_prog;
 	}
 	for (i = 0; i < 4; i++)
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index ac44fda84833..356d2c017a9c 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -109,8 +109,8 @@ void test_stacktrace_build_id(void)
 	if (build_id_matches < 1 && retry--) {
 		bpf_link__destroy(link);
 		bpf_object__close(obj);
-		printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
-		       __func__);
+		test__printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
+			     __func__);
 		goto retry;
 	}
 
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 9557b7dfb782..f44f2c159714 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -140,8 +140,8 @@ void test_stacktrace_build_id_nmi(void)
 	if (build_id_matches < 1 && retry--) {
 		bpf_link__destroy(link);
 		bpf_object__close(obj);
-		printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
-		       __func__);
+		test__printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
+			     __func__);
 		goto retry;
 	}
 
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index 09e6b46f5515..b5404494b8aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -75,7 +75,8 @@ void test_xdp_noinline(void)
 	}
 	if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
 		error_cnt++;
-		printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts);
+		test__printf("test_xdp_noinline:FAIL:stats %lld %lld\n",
+			     bytes, pkts);
 	}
 out:
 	bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 94b6951b90b3..3cf3ebda1d31 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -6,9 +6,75 @@
 #include <argp.h>
 #include <string.h>
 
+/* defined in test_progs.h */
+struct test_env env = {
+	.test_num_selector = -1,
+};
 int error_cnt, pass_cnt;
-bool jit_enabled;
-bool verifier_stats = false;
+
+struct prog_test_def {
+	const char *test_name;
+	int test_num;
+	void (*run_test)(void);
+	bool force_log;
+	int pass_cnt;
+	int error_cnt;
+	bool tested;
+};
+
+void test__force_log() {
+	env.test->force_log = true;
+}
+
+void test__vprintf(const char *fmt, va_list args)
+{
+	size_t rem_sz;
+	int ret;
+
+	if (env.verbose || (env.test && env.test->force_log)) {
+		vfprintf(stderr, fmt, args);
+		return;
+	}
+
+try_again:
+	rem_sz = env.log_cap - env.log_cnt;
+	if (rem_sz) {
+		ret = vsnprintf(env.log_buf + env.log_cnt, rem_sz, fmt, args);
+		if (ret < 0) {
+			fprintf(stderr, "failed to log message w/ fmt '%s'\n", fmt);
+			return;
+		}
+	}
+
+	if (ret >= rem_sz) {
+		size_t new_sz = env.log_cap * 3 / 2;
+		char *new_buf;
+
+		if (new_sz < 4096)
+			new_sz = 4096;
+
+		new_buf = realloc(env.log_buf, new_sz);
+		if (!new_buf) {
+			fprintf(stderr, "failed to realloc log buffer: %d\n",
+				errno);
+			return;
+		}
+		env.log_buf = new_buf;
+		env.log_cap = new_sz;
+		goto try_again;
+	}
+
+	env.log_cnt += ret + 1;
+}
+
+void test__printf(const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	test__vprintf(fmt, args);
+	va_end(args);
+}
 
 struct ipv4_packet pkt_v4 = {
 	.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
@@ -163,20 +229,15 @@ void *spin_lock_thread(void *arg)
 #include <prog_tests/tests.h>
 #undef DEFINE_TEST
 
-struct prog_test_def {
-	const char *test_name;
-	int test_num;
-	void (*run_test)(void);
-};
-
 static struct prog_test_def prog_test_defs[] = {
-#define DEFINE_TEST(name) {	      \
-	.test_name = #name,	      \
-	.run_test = &test_##name,   \
+#define DEFINE_TEST(name) {		\
+	.test_name = #name,		\
+	.run_test = &test_##name,	\
 },
 #include <prog_tests/tests.h>
 #undef DEFINE_TEST
 };
+const int prog_test_cnt = ARRAY_SIZE(prog_test_defs);
 
 const char *argp_program_version = "test_progs 0.1";
 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
@@ -186,7 +247,6 @@ enum ARG_KEYS {
 	ARG_TEST_NUM = 'n',
 	ARG_TEST_NAME = 't',
 	ARG_VERIFIER_STATS = 's',
-
 	ARG_VERBOSE = 'v',
 };
 	
@@ -202,24 +262,13 @@ static const struct argp_option opts[] = {
 	{},
 };
 
-struct test_env {
-	int test_num_selector;
-	const char *test_name_selector;
-	bool verifier_stats;
-	bool verbose;
-	bool very_verbose;
-};
-
-static struct test_env env = {
-	.test_num_selector = -1,
-};
-
 static int libbpf_print_fn(enum libbpf_print_level level,
 			   const char *format, va_list args)
 {
 	if (!env.very_verbose && level == LIBBPF_DEBUG)
 		return 0;
-	return vfprintf(stderr, format, args);
+	test__vprintf(format, args);
+	return 0;
 }
 
 static error_t parse_arg(int key, char *arg, struct argp_state *state)
@@ -267,7 +316,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	return 0;
 }
 
-
 int main(int argc, char **argv)
 {
 	static const struct argp argp = {
@@ -275,7 +323,6 @@ int main(int argc, char **argv)
 		.parser = parse_arg,
 		.doc = argp_program_doc,
 	};
-	struct prog_test_def *test;
 	int err, i;
 
 	err = argp_parse(&argp, argc, argv, 0, NULL, &env);
@@ -286,13 +333,14 @@ int main(int argc, char **argv)
 
 	srand(time(NULL));
 
-	jit_enabled = is_jit_enabled();
+	env.jit_enabled = is_jit_enabled();
 
-	verifier_stats = env.verifier_stats;
-
-	for (i = 0; i < ARRAY_SIZE(prog_test_defs); i++) {
-		test = &prog_test_defs[i];
+	for (i = 0; i < prog_test_cnt; i++) {
+		struct prog_test_def *test = &prog_test_defs[i];
+		int old_pass_cnt = pass_cnt;
+		int old_error_cnt = error_cnt;
 
+		env.test = test;
 		test->test_num = i + 1;
 
 		if (env.test_num_selector >= 0 &&
@@ -303,8 +351,29 @@ int main(int argc, char **argv)
 			continue;
 
 		test->run_test();
+		test->tested = true;
+		test->pass_cnt = pass_cnt - old_pass_cnt;
+		test->error_cnt = error_cnt - old_error_cnt;
+		if (test->error_cnt)
+			env.fail_cnt++;
+		else
+			env.succ_cnt++;
+
+		if (env.verbose || test->force_log || test->error_cnt) {
+			if (env.log_cnt) {
+				fprintf(stdout, "%s", env.log_buf);
+				if (env.log_buf[env.log_cnt - 1] != '\n')
+					fprintf(stdout, "\n");
+			}
+		}
+		env.log_cnt = 0;
+
+		printf("#%d %s:%s\n", test->test_num, test->test_name,
+		       test->error_cnt ? "FAIL" : "OK");
 	}
+	printf("Summary: %d PASSED, %d FAILED\n", env.succ_cnt, env.fail_cnt);
+
+	free(env.log_buf);
 
-	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
 }
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 49e0f7d85643..62f55a4231e9 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -38,9 +38,33 @@ typedef __u16 __sum16;
 #include "trace_helpers.h"
 #include "flow_dissector_load.h"
 
-extern int error_cnt, pass_cnt;
-extern bool jit_enabled;
-extern bool verifier_stats;
+struct prog_test_def;
+
+struct test_env {
+	int test_num_selector;
+	const char *test_name_selector;
+	bool verifier_stats;
+	bool verbose;
+	bool very_verbose;
+
+	bool jit_enabled;
+
+	struct prog_test_def *test;
+	char *log_buf;
+	size_t log_cnt;
+	size_t log_cap;
+
+	int succ_cnt;
+	int fail_cnt;
+};
+
+extern int error_cnt;
+extern int pass_cnt;
+extern struct test_env env;
+
+extern void test__printf(const char *fmt, ...);
+extern void test__vprintf(const char *fmt, va_list args);
+extern void test__force_log();
 
 #define MAGIC_BYTES 123
 
@@ -64,11 +88,12 @@ extern struct ipv6_packet pkt_v6;
 	int __ret = !!(condition);					\
 	if (__ret) {							\
 		error_cnt++;						\
-		printf("%s:FAIL:%s ", __func__, tag);			\
-		printf(format);						\
+		test__printf("%s:FAIL:%s ", __func__, tag);		\
+		test__printf(format);					\
 	} else {							\
 		pass_cnt++;						\
-		printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\
+		test__printf("%s:PASS:%s %d nsec\n",			\
+			      __func__, tag, duration);			\
 	}								\
 	__ret;								\
 })
-- 
2.17.1


^ permalink raw reply related

* [PATCH v2 bpf-next 1/9] selftests/bpf: prevent headers to be compiled as C code
From: Andrii Nakryiko @ 2019-07-27 19:01 UTC (permalink / raw)
  To: bpf, netdev, ast, daniel, sdf
  Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190727190150.649137-1-andriin@fb.com>

Apprently listing header as a normal dependency for a binary output
makes it go through compilation as if it was C code. This currently
works without a problem, but in subsequent commits causes problems for
differently generated test.h for test_progs. Marking those headers as
order-only dependency solves the issue.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/testing/selftests/bpf/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 11c9c62c3362..bb66cc4a7f34 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -235,7 +235,7 @@ PROG_TESTS_H := $(PROG_TESTS_DIR)/tests.h
 PROG_TESTS_FILES := $(wildcard prog_tests/*.c)
 test_progs.c: $(PROG_TESTS_H)
 $(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS)
-$(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_H) $(PROG_TESTS_FILES)
+$(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_FILES) | $(PROG_TESTS_H)
 $(PROG_TESTS_H): $(PROG_TESTS_FILES) | $(PROG_TESTS_DIR)
 	$(shell ( cd prog_tests/; \
 		  echo '/* Generated header, do not edit */'; \
@@ -256,7 +256,7 @@ MAP_TESTS_H := $(MAP_TESTS_DIR)/tests.h
 MAP_TESTS_FILES := $(wildcard map_tests/*.c)
 test_maps.c: $(MAP_TESTS_H)
 $(OUTPUT)/test_maps: CFLAGS += $(TEST_MAPS_CFLAGS)
-$(OUTPUT)/test_maps: test_maps.c $(MAP_TESTS_H) $(MAP_TESTS_FILES)
+$(OUTPUT)/test_maps: test_maps.c $(MAP_TESTS_FILES) | $(MAP_TESTS_H)
 $(MAP_TESTS_H): $(MAP_TESTS_FILES) | $(MAP_TESTS_DIR)
 	$(shell ( cd map_tests/; \
 		  echo '/* Generated header, do not edit */'; \
@@ -277,7 +277,7 @@ VERIFIER_TESTS_H := $(VERIFIER_TESTS_DIR)/tests.h
 VERIFIER_TEST_FILES := $(wildcard verifier/*.c)
 test_verifier.c: $(VERIFIER_TESTS_H)
 $(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS)
-$(OUTPUT)/test_verifier: test_verifier.c $(VERIFIER_TESTS_H)
+$(OUTPUT)/test_verifier: test_verifier.c | $(VERIFIER_TEST_FILES) $(VERIFIER_TESTS_H)
 $(VERIFIER_TESTS_H): $(VERIFIER_TEST_FILES) | $(VERIFIER_TESTS_DIR)
 	$(shell ( cd verifier/; \
 		  echo '/* Generated header, do not edit */'; \
-- 
2.17.1


^ permalink raw reply related

* [PATCH v2 bpf-next 2/9] selftests/bpf: revamp test_progs to allow more control
From: Andrii Nakryiko @ 2019-07-27 19:01 UTC (permalink / raw)
  To: bpf, netdev, ast, daniel, sdf
  Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190727190150.649137-1-andriin@fb.com>

Refactor test_progs to allow better control on what's being run.
Also use argp to do argument parsing, so that it's easier to keep adding
more options.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/testing/selftests/bpf/Makefile     |  8 +--
 tools/testing/selftests/bpf/test_progs.c | 84 +++++++++++++++++++++---
 2 files changed, 77 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index bb66cc4a7f34..3bd0f4a0336a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -239,14 +239,8 @@ $(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_FILES) | $(PROG_TESTS_H)
 $(PROG_TESTS_H): $(PROG_TESTS_FILES) | $(PROG_TESTS_DIR)
 	$(shell ( cd prog_tests/; \
 		  echo '/* Generated header, do not edit */'; \
-		  echo '#ifdef DECLARE'; \
 		  ls *.c 2> /dev/null | \
-			sed -e 's@\([^\.]*\)\.c@extern void test_\1(void);@'; \
-		  echo '#endif'; \
-		  echo '#ifdef CALL'; \
-		  ls *.c 2> /dev/null | \
-			sed -e 's@\([^\.]*\)\.c@test_\1();@'; \
-		  echo '#endif' \
+			sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \
 		 ) > $(PROG_TESTS_H))
 
 MAP_TESTS_DIR = $(OUTPUT)/map_tests
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index dae0819b1141..eea88ba59225 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -3,6 +3,7 @@
  */
 #include "test_progs.h"
 #include "bpf_rlimit.h"
+#include <argp.h>
 
 int error_cnt, pass_cnt;
 bool jit_enabled;
@@ -156,22 +157,89 @@ void *spin_lock_thread(void *arg)
 	pthread_exit(arg);
 }
 
-#define DECLARE
+/* extern declarations for test funcs */
+#define DEFINE_TEST(name) extern void test_##name();
 #include <prog_tests/tests.h>
-#undef DECLARE
+#undef DEFINE_TEST
 
-int main(int ac, char **av)
+struct prog_test_def {
+	const char *test_name;
+	void (*run_test)(void);
+};
+
+static struct prog_test_def prog_test_defs[] = {
+#define DEFINE_TEST(name) {	      \
+	.test_name = #name,	      \
+	.run_test = &test_##name,   \
+},
+#include <prog_tests/tests.h>
+#undef DEFINE_TEST
+};
+
+const char *argp_program_version = "test_progs 0.1";
+const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
+const char argp_program_doc[] = "BPF selftests test runner";
+
+enum ARG_KEYS {
+	ARG_VERIFIER_STATS = 's',
+};
+	
+static const struct argp_option opts[] = {
+	{ "verifier-stats", ARG_VERIFIER_STATS, NULL, 0,
+	  "Output verifier statistics", },
+	{},
+};
+
+struct test_env {
+	bool verifier_stats;
+};
+
+static struct test_env env = {};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
 {
+	struct test_env *env = state->input;
+
+	switch (key) {
+	case ARG_VERIFIER_STATS:
+		env->verifier_stats = true;
+		break;
+	case ARGP_KEY_ARG:
+		argp_usage(state);
+		break;
+	case ARGP_KEY_END:
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+	return 0;
+}
+
+
+int main(int argc, char **argv)
+{
+	static const struct argp argp = {
+		.options = opts,
+		.parser = parse_arg,
+		.doc = argp_program_doc,
+	};
+	const struct prog_test_def *def;
+	int err, i;
+
+	err = argp_parse(&argp, argc, argv, 0, NULL, &env);
+	if (err)
+		return err;
+
 	srand(time(NULL));
 
 	jit_enabled = is_jit_enabled();
 
-	if (ac == 2 && strcmp(av[1], "-s") == 0)
-		verifier_stats = true;
+	verifier_stats = env.verifier_stats;
 
-#define CALL
-#include <prog_tests/tests.h>
-#undef CALL
+	for (i = 0; i < ARRAY_SIZE(prog_test_defs); i++) {
+		def = &prog_test_defs[i];
+		def->run_test();
+	}
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
-- 
2.17.1


^ permalink raw reply related

* [PATCH v2 bpf-next 3/9] selftests/bpf: add test selectors by number and name to test_progs
From: Andrii Nakryiko @ 2019-07-27 19:01 UTC (permalink / raw)
  To: bpf, netdev, ast, daniel, sdf
  Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
In-Reply-To: <20190727190150.649137-1-andriin@fb.com>

Add ability to specify either test number or test name substring to
narrow down a set of test to run.

Usage:
sudo ./test_progs -n 1
sudo ./test_progs -t attach_probe

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/testing/selftests/bpf/test_progs.c | 43 +++++++++++++++++++++---
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index eea88ba59225..6e04b9f83777 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -4,6 +4,7 @@
 #include "test_progs.h"
 #include "bpf_rlimit.h"
 #include <argp.h>
+#include <string.h>
 
 int error_cnt, pass_cnt;
 bool jit_enabled;
@@ -164,6 +165,7 @@ void *spin_lock_thread(void *arg)
 
 struct prog_test_def {
 	const char *test_name;
+	int test_num;
 	void (*run_test)(void);
 };
 
@@ -181,26 +183,49 @@ const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
 const char argp_program_doc[] = "BPF selftests test runner";
 
 enum ARG_KEYS {
+	ARG_TEST_NUM = 'n',
+	ARG_TEST_NAME = 't',
 	ARG_VERIFIER_STATS = 's',
 };
 	
 static const struct argp_option opts[] = {
+	{ "num", ARG_TEST_NUM, "NUM", 0,
+	  "Run test number NUM only " },
+	{ "name", ARG_TEST_NAME, "NAME", 0,
+	  "Run tests with names containing NAME" },
 	{ "verifier-stats", ARG_VERIFIER_STATS, NULL, 0,
 	  "Output verifier statistics", },
 	{},
 };
 
 struct test_env {
+	int test_num_selector;
+	const char *test_name_selector;
 	bool verifier_stats;
 };
 
-static struct test_env env = {};
+static struct test_env env = {
+	.test_num_selector = -1,
+};
 
 static error_t parse_arg(int key, char *arg, struct argp_state *state)
 {
 	struct test_env *env = state->input;
 
 	switch (key) {
+	case ARG_TEST_NUM: {
+		int test_num;
+
+		errno = 0;
+		test_num = strtol(arg, NULL, 10);
+		if (errno)
+			return -errno;
+		env->test_num_selector = test_num;
+		break;
+	}
+	case ARG_TEST_NAME:
+		env->test_name_selector = arg;
+		break;
 	case ARG_VERIFIER_STATS:
 		env->verifier_stats = true;
 		break;
@@ -223,7 +248,7 @@ int main(int argc, char **argv)
 		.parser = parse_arg,
 		.doc = argp_program_doc,
 	};
-	const struct prog_test_def *def;
+	struct prog_test_def *test;
 	int err, i;
 
 	err = argp_parse(&argp, argc, argv, 0, NULL, &env);
@@ -237,8 +262,18 @@ int main(int argc, char **argv)
 	verifier_stats = env.verifier_stats;
 
 	for (i = 0; i < ARRAY_SIZE(prog_test_defs); i++) {
-		def = &prog_test_defs[i];
-		def->run_test();
+		test = &prog_test_defs[i];
+
+		test->test_num = i + 1;
+
+		if (env.test_num_selector >= 0 &&
+		    test->test_num != env.test_num_selector)
+			continue;
+		if (env.test_name_selector &&
+		    !strstr(test->test_name, env.test_name_selector))
+			continue;
+
+		test->run_test();
 	}
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
-- 
2.17.1


^ permalink raw reply related

* [PATCH v2 bpf-next 0/9] Revamp test_progs as a test running framework
From: Andrii Nakryiko @ 2019-07-27 19:01 UTC (permalink / raw)
  To: bpf, netdev, ast, daniel, sdf
  Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko

This patch set makes a number of changes to test_progs selftest, which is
a collection of many other tests (and sometimes sub-tests as well), to provide
better testing experience and allow to start convering many individual test
programs under selftests/bpf into a single and convenient test runner.

Patch #1 fixes issue with Makefile, which makes prog_tests/test.h compiled as
a C code. This fix allows to change how test.h is generated, providing ability
to have more control on what and how tests are run.

Patch #2 changes how test.h is auto-generated, which allows to have test
definitions, instead of just running test functions. This gives ability to do
more complicated test run policies.

Patch #3 adds `-t <test-name>` and `-n <test-num>` selectors to run only
subset of tests.

Patch #4 changes libbpf_set_print() to return previously set print callback,
allowing to temporarily replace current print callback and then set it back.
This is necessary for some tests that want more control over libbpf logging.

Patch #5 sets up and takes over libbpf logging from individual tests to
test_prog runner, adding -vv verbosity to capture debug output from libbpf.
This is useful when debugging failing tests.

Patch #6 furthers test output management and buffers it by default, emitting
log output only if test fails. This give succinct and clean default test
output. It's possible to bypass this behavior with -v flag, which will turn
off test output buffering.

Patch #7 adds support for sub-tests. It also enhances -t and -n selectors to
both support ability to specify sub-test selectors, as well as enhancing
number selector to accept sets of test, instead of just individual test
number.

Patch #8 converts bpf_verif_scale.c test to use sub-test APIs.

Patch #9 converts send_signal.c tests to use sub-test APIs.

v1->v2:
  - drop libbpf_swap_print, instead return previous function from
    libbpf_set_print (Stanislav);

Andrii Nakryiko (9):
  selftests/bpf: prevent headers to be compiled as C code
  selftests/bpf: revamp test_progs to allow more control
  selftests/bpf: add test selectors by number and name to test_progs
  libbpf: return previous print callback from libbpf_set_print
  selftest/bpf: centralize libbpf logging management for test_progs
  selftests/bpf: abstract away test log output
  selftests/bpf: add sub-tests support for test_progs
  selftests/bpf: convert bpf_verif_scale.c to sub-tests API
  selftests/bpf: convert send_signal.c to use subtests

 tools/lib/bpf/libbpf.c                        |   5 +-
 tools/lib/bpf/libbpf.h                        |   2 +-
 tools/testing/selftests/bpf/Makefile          |  14 +-
 .../selftests/bpf/prog_tests/bpf_obj_id.c     |   6 +-
 .../bpf/prog_tests/bpf_verif_scale.c          |  90 +++--
 .../bpf/prog_tests/get_stack_raw_tp.c         |   4 +-
 .../selftests/bpf/prog_tests/l4lb_all.c       |   2 +-
 .../selftests/bpf/prog_tests/map_lock.c       |  10 +-
 .../bpf/prog_tests/reference_tracking.c       |  15 +-
 .../selftests/bpf/prog_tests/send_signal.c    |  17 +-
 .../selftests/bpf/prog_tests/spinlock.c       |   2 +-
 .../bpf/prog_tests/stacktrace_build_id.c      |   4 +-
 .../bpf/prog_tests/stacktrace_build_id_nmi.c  |   4 +-
 .../selftests/bpf/prog_tests/xdp_noinline.c   |   3 +-
 tools/testing/selftests/bpf/test_progs.c      | 373 +++++++++++++++++-
 tools/testing/selftests/bpf/test_progs.h      |  45 ++-
 16 files changed, 492 insertions(+), 104 deletions(-)

-- 
2.17.1


^ permalink raw reply

* Re: [PATCH bpf-next 02/10] libbpf: implement BPF CO-RE offset relocation algorithm
From: Song Liu @ 2019-07-27 18:59 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Andrii Nakryiko, bpf, Networking, Alexei Starovoitov,
	Daniel Borkmann, Yonghong Song, Kernel Team
In-Reply-To: <CAEf4BzZKA29xudKC8WWEXJq+egTCgX4bV9KaE0Y+_u50=D70iQ@mail.gmail.com>



> On Jul 26, 2019, at 11:11 PM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> 
> On Thu, Jul 25, 2019 at 12:32 PM Song Liu <songliubraving@fb.com> wrote:
>> 
>> 
>> 
>>> On Jul 24, 2019, at 12:27 PM, Andrii Nakryiko <andriin@fb.com> wrote:
>>> 
>>> This patch implements the core logic for BPF CO-RE offsets relocations.
>>> All the details are described in code comments.
>> 
>> Some description in the change log is still useful. Please at least
>> copy-paste key comments here.
> 
> OK, will add some more.
> 
>> 
>> And, this is looooong. I think it is totally possible to split it into
>> multiple smaller patches.
> 
> I don't really know how to split it further without hurting reviewing
> by artificially splitting related code into separate patches. Remove
> any single function and algorithm will be incomplete.
> 
> Let me give you some high-level overview of how pieces are put
> together. There are 9 non-trivial functions, let's go over their
> purpose in the orderd in which they are defined in file:
> 
> 1. bpf_core_spec_parse()
> 
> This one take bpf_offset_reloc's type_id and accessor string
> ("0:1:2:3") and parses it into more convenient bpf_core_spec
> datastructure, which has calculated offset and high-level spec
> "steps": either named field or array access.
> 
> 2. bpf_core_find_cands()
> 
> Given local type name, finds all possible target BTF types with same
> name (modulo "flavor" differences, ___flavor suffix is just ignored).
> 
> 3. bpf_core_fields_are_compat()
> 
> Given local and target field match, checks that their types are
> compatible (so that we don't accidentally match, e.g., int against
> struct).
> 
> 4. bpf_core_match_member()
> 
> Given named local field, find corresponding field in target struct. To
> understand why it's not trivial, here's an example:
> 
> Local type:
> 
> struct s___local {
>  int a;
> };
> 
> Target type:
> 
> struct s___target {
>  struct {
>    union {
>      int a;
>    };
>  };
> };
> 
> For both cases you can access a as s.a, but in local case, field a is
> immediately inside s___local, while for s___target, you have to
> traverse two levels deeper into anonymous fields to get to an `a`
> inside anonymous union.
> 
> So this function find that `a` by doing exhaustive search across all
> named field and anonymous struct/unions. But otherwise it's pretty
> straightforward recursive function.
> 
> bpf_core_spec_match()
> 
> Just goes over high-level spec steps in local spec and tries to figure
> out both high-level and low-level steps for targe type. Consider the
> above example. For both structs accessing s.a is one high-level step,
> but for s___local it's single low-level step (just another :0 in spec
> string), while for s___target it's three low-level steps: ":0:0:0",
> one step for each BTF type we need to traverse.
> 
> Array access is simpler, it's always one high-level and one low-level step.
> 
> bpf_core_reloc_insn()
> 
> Once we match local and target specs and have local and target
> offsets, do the relocations - check that instruction has expected
> local offset and replace it with target offset.
> 
> bpf_core_find_kernel_btf()
> 
> This is the only function that can be moved into separate patch, but
> it's also very simple. It just iterates over few known possible
> locations for vmlinux image and once found, tries to parse .BTF out of
> it, to be used as target BTF.
> 
> bpf_core_reloc_offset()
> 
> It combines all the above functions to perform single relocation.
> Parse spec, get candidates, for each candidate try to find matching
> target spec. All candidates that matched are cached for given local
> root type.

Thanks for these explanation. They are really helpful. 

I think some example explaining each step of bpf_core_reloc_offset()
will be very helpful. Something like:

Example:

struct s {
	int a;
	struct {
		int b;
		bool c;
	};
};

To get offset for c, we do:

bpf_core_reloc_offset() {
	
	/* input data: xxx */

	/* first step: bpf_core_spec_parse() */

	/* data after first step */

	/* second step: bpf_core_find_cands() */

	/* candidate A and B after second step */

	...
}

Well, it requires quite some work to document this way. Please let me 
know if you feel this is an overkill. 

> 
> bpf_core_reloc_offsets()
> 
> High-level coordination. Iterate over all per-program .BTF.ext offset
> reloc sections, each relocation within them. Find corresponding
> program and try to apply relocations one by one.
> 
> 
> I think the only non-obvious part here is to understand that
> relocation records local raw spec with every single anonymous type
> traversal, which is not that useful when we try to match it against
> target type, which can have very different composition, but still the
> same field access pattern, from C language standpoint (which hides all
> those anonymous type traversals from programmer).
> 
> But it should be pretty clear now, plus also check tests, they have
> lots of cases showing what's compatible and what's not.

I see. I will review the tests. 

>>> 
>>> static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
>>> -                                                  __u32 id)
>>> +                                                  __u32 id,
>>> +                                                  __u32 *res_id)
>>> {
>>>      const struct btf_type *t = btf__type_by_id(btf, id);
>> 
>> Maybe have a local "__u32 rid;"
>> 
>>> 
>>> +     if (res_id)
>>> +             *res_id = id;
>>> +
>> 
>> and do "rid = id;" here
>> 
>>>      while (true) {
>>>              switch (BTF_INFO_KIND(t->info)) {
>>>              case BTF_KIND_VOLATILE:
>>>              case BTF_KIND_CONST:
>>>              case BTF_KIND_RESTRICT:
>>>              case BTF_KIND_TYPEDEF:
>>> +                     if (res_id)
>>> +                             *res_id = t->type;
>> and here
>> 
>>>                      t = btf__type_by_id(btf, t->type);
>>>                      break;
>>>              default:
>> and "*res_id = rid;" right before return?
> 
> Sure, but why?

I think it is cleaner that way. But feel free to ignore if you
think otherwise. 

> 
>> 
>>> @@ -1041,7 +1049,7 @@ static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
>>> static bool get_map_field_int(const char *map_name, const struct btf *btf,
>>>                            const struct btf_type *def,
>>>                            const struct btf_member *m, __u32 *res) {
> 
> [...]
> 
>>> +struct bpf_core_spec {
>>> +     const struct btf *btf;
>>> +     /* high-level spec: named fields and array indicies only */
>> 
>> typo: indices
> 
> thanks!
> 
>> 
>>> +     struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
>>> +     /* high-level spec length */
>>> +     int len;
>>> +     /* raw, low-level spec: 1-to-1 with accessor spec string */
>>> +     int raw_spec[BPF_CORE_SPEC_MAX_LEN];
>>> +     /* raw spec length */
>>> +     int raw_len;
>>> +     /* field byte offset represented by spec */
>>> +     __u32 offset;
>>> +};
> 
> [...]
> 
>>> + *
>>> + *   int x = &s->a[3]; // access string = '0:1:2:3'
>>> + *
>>> + * Low-level spec has 1:1 mapping with each element of access string (it's
>>> + * just a parsed access string representation): [0, 1, 2, 3].
>>> + *
>>> + * High-level spec will capture only 3 points:
>>> + *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
>>> + *   - field 'a' access (corresponds to '2' in low-level spec);
>>> + *   - array element #3 access (corresponds to '3' in low-level spec).
>>> + *
>>> + */
>> 
>> IIUC, high-level points are subset of low-level points. How about we introduce
>> "anonymous" high-level points, so that high-level points and low-level points
>> are 1:1 mapping?
> 
> No, that will just hurt and complicate things. See above explanation
> about why we need high-level points (it's what you as C programmer try
> to achieve vs low-level spec is what C-language does in reality, with
> all the anonymous struct/union traversal).
> 
> What's wrong with this separation? Think about it as recording
> "intent" (high-level spec) vs "mechanics" (low-level spec, how exactly
> to achieve that intent, in excruciating details).

There is nothing wrong with separation. I just personally think it is 
cleaner the other way. That's why I raised the question. 

I will go with your assessment, as you looked into this much more than 
I did. :-)

[...]

>>> +
>>> +     memset(spec, 0, sizeof(*spec));
>>> +     spec->btf = btf;
>>> +
>>> +     /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
>>> +     while (*spec_str) {
>>> +             if (*spec_str == ':')
>>> +                     ++spec_str;
>>> +             if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
>>> +                     return -EINVAL;
>>> +             if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
>>> +                     return -E2BIG;
>>> +             spec_str += parsed_len;
>>> +             spec->raw_spec[spec->raw_len++] = access_idx;
>>> +     }
>>> +
>>> +     if (spec->raw_len == 0)
>>> +             return -EINVAL;
>>> +
>>> +     for (i = 0; i < spec->raw_len; i++) {
>>> +             t = skip_mods_and_typedefs(btf, id, &id);
>>> +             if (!t)
>>> +                     return -EINVAL;
>>> +
>>> +             access_idx = spec->raw_spec[i];
>>> +
>>> +             if (i == 0) {
>>> +                     /* first spec value is always reloc type array index */
>>> +                     spec->spec[spec->len].type_id = id;
>>> +                     spec->spec[spec->len].idx = access_idx;
>>> +                     spec->len++;
>>> +
>>> +                     sz = btf__resolve_size(btf, id);
>>> +                     if (sz < 0)
>>> +                             return sz;
>>> +                     spec->offset += access_idx * sz;
>>          spec->offset = access_idx * sz;  should be enough
> 
> No. spec->offset is carefully maintained across multiple low-level
> steps, as we traverse down embedded structs/unions.
> 
> Think about, e.g.:
> 
> struct s {
>    int a;
>    struct {
>        int b;
>    };
> };
> 
> Imagine you are trying to match s.b access. With what you propose
> you'll end up with offset 0, but it should be 4.

Hmm... this is just for i == 0, right? Which line updated spec->offset
after "memset(spec, 0, sizeof(*spec));"?

> 
>> 
>>> +                     continue;
>>> +             }
>> 
>> Maybe pull i == 0 case out of the for loop?
>> 
>>> +
>>> +             if (btf_is_composite(t)) {
> 
> [...]
> 
>>> +
>>> +     if (spec->len == 0)
>>> +             return -EINVAL;
>> 
>> Can this ever happen?
> 
> Not really, because I already check raw_len == 0 and exit with error.
> I'll remove.
> 
>> 
>>> +
>>> +     return 0;
>>> +}
>>> +
> 
> [...]
> 
>>> +
>>> +/*
>>> + * Given single high-level accessor (either named field or array index) in
>>> + * local type, find corresponding high-level accessor for a target type. Along
>>> + * the way, maintain low-level spec for target as well. Also keep updating
>>> + * target offset.
>>> + */
>> 
>> Please describe the recursive algorithm here. I am kinda lost.
> 
> Explained above. I'll extend description a bit. But it's just
> recursive exhaustive search:
> 1. if struct field is anonymous and is struct/union, go one level
> deeper and try to find field with given name inside those.
> 2. if field has name and it matched what we are searching - check type
> compatibility. It has to be compatible, so if it's not, then it's not
> a match.
> 
>> Also, please document the meaning of zero, positive, negative return values.
> 
> Ok. It's standard <0 - error, 0 - false, 1 - true.
> 
>> 
>>> +static int bpf_core_match_member(const struct btf *local_btf,
>>> +                              const struct bpf_core_accessor *local_acc,
>>> +                              const struct btf *targ_btf,
>>> +                              __u32 targ_id,
>>> +                              struct bpf_core_spec *spec,
>>> +                              __u32 *next_targ_id)
>>> +{
> 
> [...]
> 
>>> +             if (local_acc->name) {
>>> +                     if (!btf_is_composite(targ_type))
>>> +                             return 0;
>>> +
>>> +                     matched = bpf_core_match_member(local_spec->btf,
>>> +                                                     local_acc,
>>> +                                                     targ_btf, targ_id,
>>> +                                                     targ_spec, &targ_id);
>>> +                     if (matched <= 0)
>>> +                             return matched;
>>> +             } else {
>>> +                     /* for i=0, targ_id is already treated as array element
>>> +                      * type (because it's the original struct), for others
>>> +                      * we should find array element type first
>>> +                      */
>>> +                     if (i > 0) {
>> 
>> i == 0 case would go into "if (local_acc->name)" branch, no?
> 
> No, i == 0 is always an array access. s->a.b.c is the same as
> s[0].a.b.c, so relocation's first spec element is always either zero
> for pointer access or any non-negative index for array access. But it
> is always array access.

I see. Thanks for the explanation.

Song

^ permalink raw reply

* Re: [PATCH bpf-next 6/9] selftests/bpf: abstract away test log output
From: Andrii Nakryiko @ 2019-07-27 18:56 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Andrii Nakryiko, bpf, Networking, Alexei Starovoitov,
	Daniel Borkmann, Kernel Team
In-Reply-To: <20190726222652.GG24397@mini-arch>

On Fri, Jul 26, 2019 at 3:26 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> On 07/26, Andrii Nakryiko wrote:
> > On Fri, Jul 26, 2019 at 2:31 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > >
> > > On 07/26, Andrii Nakryiko wrote:
> > > > This patch changes how test output is printed out. By default, if test
> > > > had no errors, the only output will be a single line with test number,
> > > > name, and verdict at the end, e.g.:
> > > >
> > > >   #31 xdp:OK
> > > >
> > > > If test had any errors, all log output captured during test execution
> > > > will be output after test completes.
> > > >
> > > > It's possible to force output of log with `-v` (`--verbose`) option, in
> > > > which case output won't be buffered and will be output immediately.
> > > >
> > > > To support this, individual tests are required to use helper methods for
> > > > logging: `test__printf()` and `test__vprintf()`.
> > > >
> > > > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > > > ---
> > > >  .../selftests/bpf/prog_tests/bpf_obj_id.c     |   6 +-
> > > >  .../bpf/prog_tests/bpf_verif_scale.c          |  31 ++--
> > > >  .../bpf/prog_tests/get_stack_raw_tp.c         |   4 +-
> > > >  .../selftests/bpf/prog_tests/l4lb_all.c       |   2 +-
> > > >  .../selftests/bpf/prog_tests/map_lock.c       |  10 +-
> > > >  .../selftests/bpf/prog_tests/send_signal.c    |   8 +-
> > > >  .../selftests/bpf/prog_tests/spinlock.c       |   2 +-
> > > >  .../bpf/prog_tests/stacktrace_build_id.c      |   4 +-
> > > >  .../bpf/prog_tests/stacktrace_build_id_nmi.c  |   4 +-
> > > >  .../selftests/bpf/prog_tests/xdp_noinline.c   |   3 +-
> > > >  tools/testing/selftests/bpf/test_progs.c      | 135 +++++++++++++-----
> > > >  tools/testing/selftests/bpf/test_progs.h      |  37 ++++-
> > > >  12 files changed, 173 insertions(+), 73 deletions(-)
> > > >
> >
> > [...]
> >
> > > >               error_cnt++;
> > > > -             printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts);
> > > > +             test__printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts);
> > > #define printf(...) test__printf(...) in tests.h?
> > >
> > > A bit ugly, but no need to retrain everyone to use new printf wrappers.
> >
> > I try to reduce amount of magic and surprising things, not add new
> > ones :) I also led by example and converted all current instances of
> > printf usage to test__printf, so anyone new will just copy/paste good
> > example, hopefully. Even if not, this non-buffered output will be
> > immediately obvious to anyone who just runs `sudo ./test_progs`.
>
> [..]
> > And
> > author of new test with this problem should hopefully be the first and
> > the only one to catch and fix this.
> Yeah, that is my only concern, that regular printfs will eventually
> creep in. It's already confusing to go to/from printf/printk.

We should catch that in code review, but Alexei and Daniel will be the
last line of defense anywas, as they run test_progs before merging
stuff and will immediately notice extra non-buffered output, given
that successful output from test_progs is now very laconic.

>
> 2c:
>
> I'm coming from a perspective of tools/testing/selftests/kselftest.h
> which is supposed to be a generic framework with custom
> printf variants (ksft_print_msg), but I still see a bunch of tests
> calling printf :-/
>
>         grep -ril ksft_exit_fail_msg selftests/ | xargs -n1 grep -w printf
>
> Since we don't expect regular buffered io from the tests anyway
> it might be easier just to add a bit of magic and call it a day.
>
> > > >       }
> > > >  out:
> > > >       bpf_object__close(obj);
> > > > diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
> > > > index ee99368c595c..2e78217ed3fd 100644
> > > > --- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
> > > > +++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
> > > > @@ -9,12 +9,12 @@ static void *parallel_map_access(void *arg)
> >
> > [...]

^ permalink raw reply

* Re: [PATCH bpf-next 1/9] selftests/bpf: prevent headers to be compiled as C code
From: Andrii Nakryiko @ 2019-07-27 18:53 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Andrii Nakryiko, bpf, Networking, Alexei Starovoitov,
	Daniel Borkmann, Kernel Team
In-Reply-To: <20190726220119.GE24397@mini-arch>

On Fri, Jul 26, 2019 at 3:01 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> On 07/26, Andrii Nakryiko wrote:
> > On Fri, Jul 26, 2019 at 2:21 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > >
> > > On 07/26, Andrii Nakryiko wrote:
> > > > Apprently listing header as a normal dependency for a binary output
> > > > makes it go through compilation as if it was C code. This currently
> > > > works without a problem, but in subsequent commits causes problems for
> > > > differently generated test.h for test_progs. Marking those headers as
> > > > order-only dependency solves the issue.
> > > Are you sure it will not result in a situation where
> > > test_progs/test_maps is not regenerated if tests.h is updated.
> > >
> > > If I read the following doc correctly, order deps make sense for
> > > directories only:
> > > https://www.gnu.org/software/make/manual/html_node/Prerequisite-Types.html
> > >
> > > Can you maybe double check it with:
> > > * make
> > > * add new prog_tests/test_something.c
> > > * make
> > > to see if the binary is regenerated with test_something.c?
> >
> > Yeah, tested that, it triggers test_progs rebuild.
> >
> > Ordering is still preserved, because test.h is dependency of
> > test_progs.c, which is dependency of test_progs binary, so that's why
> > it works.
> >
> > As to why .h file is compiled as C file, I have no idea and ideally
> > that should be fixed somehow.
> I guess that's because it's a prerequisite and we have a target that
> puts all prerequisites when calling CC:
>
> test_progs: a.c b.c tests.h
>         gcc a.c b.c tests.h -o test_progs
>
> So gcc compiles each input file.

If that's really a definition of the rule, then it seems not exactly
correct. What if some of the prerequisites are some object files,
directories, etc. I'd say the rule should only include .c files. I'll
add it to my TODO list to go and check how all this is defined
somewhere, but for now I'm leaving everything as is in this patch.

>
> I'm not actually sure why default dependency system that uses 'gcc -M'
> is not working for us (see scripts/Kbuild.include) and we need to manually
> add tests.h dependency. But that's outside of the scope..
>
> > I also started with just removing header as dependency completely
> > (because it's indirect dependency of test_progs.c), but that broke the
> > build logic. Dunno, too much magic... This works, tested many-many
> > times, so I was satisfied enough :)
> Yeah, that's my only concern, too much magic already and we add
> quite a bit more.
>
> > > Maybe fix the problem of header compilation by having '#ifndef
> > > DECLARE_TEST #define DECLARE_TEST() #endif' in tests.h instead?
> >
> > That's ugly, I'd like to avoid doing that.
> That's your call, but I'm not sure what's uglier: complicating already
> complex make rules or making a header self contained.

The right call is to fix selftests/bpf Makefile for good (there are
more issues, e.g., we rebuild all the prog_tests/*.c for alu32 tests,
even if we only modified test_progs.c), but that's for another patch
set, unless someone beats me and fixes that first.

>
> > > > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > > > ---
> > > >  tools/testing/selftests/bpf/Makefile | 6 +++---
> > > >  1 file changed, 3 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
> > > > index 11c9c62c3362..bb66cc4a7f34 100644
> > > > --- a/tools/testing/selftests/bpf/Makefile
> > > > +++ b/tools/testing/selftests/bpf/Makefile
> > > > @@ -235,7 +235,7 @@ PROG_TESTS_H := $(PROG_TESTS_DIR)/tests.h
> > > >  PROG_TESTS_FILES := $(wildcard prog_tests/*.c)
> > > >  test_progs.c: $(PROG_TESTS_H)
> > > >  $(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS)
> > > > -$(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_H) $(PROG_TESTS_FILES)
> > > > +$(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_FILES) | $(PROG_TESTS_H)
> > > >  $(PROG_TESTS_H): $(PROG_TESTS_FILES) | $(PROG_TESTS_DIR)
> > > >       $(shell ( cd prog_tests/; \
> > > >                 echo '/* Generated header, do not edit */'; \
> > > > @@ -256,7 +256,7 @@ MAP_TESTS_H := $(MAP_TESTS_DIR)/tests.h
> > > >  MAP_TESTS_FILES := $(wildcard map_tests/*.c)
> > > >  test_maps.c: $(MAP_TESTS_H)
> > > >  $(OUTPUT)/test_maps: CFLAGS += $(TEST_MAPS_CFLAGS)
> > > > -$(OUTPUT)/test_maps: test_maps.c $(MAP_TESTS_H) $(MAP_TESTS_FILES)
> > > > +$(OUTPUT)/test_maps: test_maps.c $(MAP_TESTS_FILES) | $(MAP_TESTS_H)
> > > >  $(MAP_TESTS_H): $(MAP_TESTS_FILES) | $(MAP_TESTS_DIR)
> > > >       $(shell ( cd map_tests/; \
> > > >                 echo '/* Generated header, do not edit */'; \
> > > > @@ -277,7 +277,7 @@ VERIFIER_TESTS_H := $(VERIFIER_TESTS_DIR)/tests.h
> > > >  VERIFIER_TEST_FILES := $(wildcard verifier/*.c)
> > > >  test_verifier.c: $(VERIFIER_TESTS_H)
> > > >  $(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS)
> > > > -$(OUTPUT)/test_verifier: test_verifier.c $(VERIFIER_TESTS_H)
> > > > +$(OUTPUT)/test_verifier: test_verifier.c | $(VERIFIER_TEST_FILES) $(VERIFIER_TESTS_H)
> > > >  $(VERIFIER_TESTS_H): $(VERIFIER_TEST_FILES) | $(VERIFIER_TESTS_DIR)
> > > >       $(shell ( cd verifier/; \
> > > >                 echo '/* Generated header, do not edit */'; \
> > > > --
> > > > 2.17.1
> > > >

^ permalink raw reply

* Re: [PATCH net-next 3/3] net: dsa: mt7530: Add support for port 5
From: Russell King - ARM Linux admin @ 2019-07-27 18:53 UTC (permalink / raw)
  To: René van Dorst
  Cc: netdev, frank-w, sean.wang, f.fainelli, davem, matthias.bgg,
	andrew, vivien.didelot, john, linux-mediatek, linux-mips, robh+dt,
	devicetree
In-Reply-To: <20190724192549.24615-4-opensource@vdorst.com>

On Wed, Jul 24, 2019 at 09:25:49PM +0200, René van Dorst wrote:
> Adding support for port 5.
> 
> Port 5 can muxed/interface to:
> - internal 5th GMAC of the switch; can be used as 2nd CPU port or as
>   extra port with an external phy for a 6th ethernet port.
> - internal PHY of port 0 or 4; Used in most applications so that port 0
>   or 4 is the WAN port and interfaces with the 2nd GMAC of the SOC.

...

> @@ -1381,15 +1506,19 @@ static void mt7530_phylink_validate(struct dsa_switch *ds, int port,
>  	phylink_set_port_modes(mask);
>  	phylink_set(mask, Autoneg);
>  
> -	if (state->interface != PHY_INTERFACE_MODE_TRGMII) {
> +	if (state->interface == PHY_INTERFACE_MODE_TRGMII) {
> +		phylink_set(mask, 1000baseT_Full);
> +	} else {
>  		phylink_set(mask, 10baseT_Half);
>  		phylink_set(mask, 10baseT_Full);
>  		phylink_set(mask, 100baseT_Half);
>  		phylink_set(mask, 100baseT_Full);
> -		phylink_set(mask, 1000baseT_Half);
> -	}
>  
> -	phylink_set(mask, 1000baseT_Full);
> +		if (state->interface != PHY_INTERFACE_MODE_MII) {
> +			phylink_set(mask, 1000baseT_Half);
> +			phylink_set(mask, 1000baseT_Full);
> +		}
> +	}

As port 5 could use an external PHY, and it supports gigabit speeds,
consider that the PHY may provide not only copper but also fiber
connectivity, so port 5 should probably also have 1000baseX modes
too, which would allow such a PHY to bridge the switch to fiber.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up

^ permalink raw reply

* Re: [PATCH bpf-next 4/9] libbpf: add libbpf_swap_print to get previous print func
From: Andrii Nakryiko @ 2019-07-27 18:49 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Stanislav Fomichev, Andrii Nakryiko, bpf, Networking,
	Alexei Starovoitov, Daniel Borkmann, Kernel Team
In-Reply-To: <20190727003045.63s6qau6kcnpkgxq@ast-mbp.dhcp.thefacebook.com>

On Fri, Jul 26, 2019 at 5:30 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Fri, Jul 26, 2019 at 02:47:28PM -0700, Andrii Nakryiko wrote:
> > On Fri, Jul 26, 2019 at 2:28 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > >
> > > On 07/26, Andrii Nakryiko wrote:
> > > > libbpf_swap_print allows to restore previously set print function.
> > > > This is useful when running many independent test with one default print
> > > > function, but overriding log verbosity for particular subset of tests.
> > > Can we change the return type of libbpf_set_print instead and return
> > > the old function from it? Will it break ABI?
> >
> > Yeah, thought about that, but I wasn't sure about ABI breakage. It
> > seems like it shouldn't, so I'll just change libbpf_set_print
> > signature instead.
>
> I think it's ok to change return value of libbpf_set_print() from void
> to useful pointer.

Some googling gave inconclusive results. StackOverflow answers claim
it is compatible ABI change ([0]), but I also found some guidelines
for Android that designate any return type change as incompatible
([1]). [2] wasn't very helpful about defining compatibility rules,
unfortunately. I'm going with [0], though, and changing return type.

  [0] https://stackoverflow.com/questions/15626579/c-abi-is-changing-a-void-function-to-return-an-int-a-breaking-change
  [1] https://source.android.com/devices/architecture/vndk/abi-stability
  [2] https://www.cs.dartmouth.edu/~sergey/cs258/ABI/UlrichDrepper-How-To-Write-Shared-Libraries.pdf

> This function is not marked as __attribute__((__warn_unused_result__)),
> so there should be no abi issues.
>
> Please double check by compiler perf with different gcc-s as Arnaldo's setup does.
>

Compiled (make -C tools/perf) with GCC 4.8.5, GCC 7, and Clang 8. None
of them produced any warning, so I'm going forward with just return
type change.

^ permalink raw reply

* Re: [PATCH net-next 1/3] net: dsa: mt7530: Convert to PHYLINK API
From: Russell King - ARM Linux admin @ 2019-07-27 18:48 UTC (permalink / raw)
  To: René van Dorst
  Cc: netdev, frank-w, sean.wang, f.fainelli, davem, matthias.bgg,
	andrew, vivien.didelot, john, linux-mediatek, linux-mips, robh+dt,
	devicetree
In-Reply-To: <20190724192549.24615-2-opensource@vdorst.com>

Hi,

Just a couple of minor points.

On Wed, Jul 24, 2019 at 09:25:47PM +0200, René van Dorst wrote:
> +static void mt7530_phylink_mac_config(struct dsa_switch *ds, int port,
> +				      unsigned int mode,
> +				      const struct phylink_link_state *state)
> +{
> +	struct mt7530_priv *priv = ds->priv;
> +	u32 mcr_cur, mcr_new;
> +
> +	switch (port) {
> +	case 0: /* Internal phy */
> +	case 1:
> +	case 2:
> +	case 3:
> +	case 4:
> +		if (state->interface != PHY_INTERFACE_MODE_GMII)
> +			return;
> +		break;
> +	/* case 5: Port 5 is not supported! */
> +	case 6: /* 1st cpu port */
> +		if (state->interface != PHY_INTERFACE_MODE_RGMII &&
> +		    state->interface != PHY_INTERFACE_MODE_TRGMII)
> +			return;
> +
> +		if (priv->p6_interface == state->interface)
> +			break;
> +		/* Setup TX circuit incluing relevant PAD and driving */
> +		mt7530_pad_clk_setup(ds, state->interface);
> +
> +		if (priv->id == ID_MT7530) {
> +			/* Setup RX circuit, relevant PAD and driving on the
> +			 * host which must be placed after the setup on the
> +			 * device side is all finished.
> +			 */
> +			mt7623_pad_clk_setup(ds);
> +		}
> +		priv->p6_interface = state->interface;
> +		break;
> +	default:
> +		dev_err(ds->dev, "%s: unsupported port: %i\n", __func__, port);
> +		return;
> +	}

	if (phylink_autoneg_inband(mode)) {
		dev_err(ds->dev, "%s: in-band negotiation unsupported\n",
			__func__);
		return;
	}

or similar, since you don't support inband AN in this code path.

> +
> +	mcr_cur = mt7530_read(priv, MT7530_PMCR_P(port));
> +	mcr_new = mcr_cur;
> +	mcr_new &= ~(PMCR_FORCE_SPEED_1000 | PMCR_FORCE_SPEED_100 |
> +		     PMCR_FORCE_FDX | PMCR_TX_FC_EN | PMCR_RX_FC_EN);
> +	mcr_new |= PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | PMCR_BACKOFF_EN |
> +		   PMCR_BACKPR_EN | PMCR_FORCE_MODE | PMCR_FORCE_LNK;
> +
> +	switch (state->speed) {
> +	case SPEED_1000:
> +		mcr_new |= PMCR_FORCE_SPEED_1000;
> +		break;
> +	case SPEED_100:
> +		mcr_new |= PMCR_FORCE_SPEED_100;
> +		break;
> +	}
> +	if (state->duplex == DUPLEX_FULL) {
> +		mcr_new |= PMCR_FORCE_FDX;
> +		if (state->pause & MLO_PAUSE_TX)
> +			mcr_new |= PMCR_TX_FC_EN;
> +		if (state->pause & MLO_PAUSE_RX)
> +			mcr_new |= PMCR_RX_FC_EN;
> +	}
> +
> +	if (mcr_new != mcr_cur)
> +		mt7530_write(priv, MT7530_PMCR_P(port), mcr_new);
> +}
> +
> +static void mt7530_phylink_mac_link_down(struct dsa_switch *ds, int port,
> +					 unsigned int mode,
> +					 phy_interface_t interface)
> +{
> +	struct mt7530_priv *priv = ds->priv;
> +
> +	mt7530_port_set_status(priv, port, 0);
> +}
> +
> +static void mt7530_phylink_mac_link_up(struct dsa_switch *ds, int port,
> +				       unsigned int mode,
> +				       phy_interface_t interface,
> +				       struct phy_device *phydev)
> +{
> +	struct mt7530_priv *priv = ds->priv;
> +
> +	mt7530_port_set_status(priv, port, 1);
> +}
> +
> +static void mt7530_phylink_validate(struct dsa_switch *ds, int port,
> +				    unsigned long *supported,
> +				    struct phylink_link_state *state)
> +{
> +	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
> +
> +	switch (port) {
> +	case 0: /* Internal phy */
> +	case 1:
> +	case 2:
> +	case 3:
> +	case 4:
> +		if (state->interface != PHY_INTERFACE_MODE_NA &&
> +		    state->interface != PHY_INTERFACE_MODE_GMII)
> +			goto unsupported;
> +		break;
> +	/* case 5: Port 5 not supported! */
> +	case 6: /* 1st cpu port */
> +		if (state->interface != PHY_INTERFACE_MODE_NA &&
> +		    state->interface != PHY_INTERFACE_MODE_RGMII &&
> +		    state->interface != PHY_INTERFACE_MODE_TRGMII)
> +			goto unsupported;
> +		break;
> +	default:
> +		linkmode_zero(supported);
> +		dev_err(ds->dev, "%s: unsupported port: %i\n", __func__, port);

You could reorder this as:

	default:
		dev_err(ds->dev, "%s: unsupported port: %i\n", __func__, port);
	unsupported:
		linkmode_zero(supported);

and save having the "unsupported" at the end of the function.  Not sure
what DaveM would think of it though.


-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up

^ permalink raw reply

* Re: [PATCH net-next 3/3] net: dsa: mt7530: Add support for port 5
From: René van Dorst @ 2019-07-27 18:43 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, frank-w, sean.wang, f.fainelli, linux, matthias.bgg,
	andrew, vivien.didelot, john, linux-mediatek, linux-mips, robh+dt,
	devicetree
In-Reply-To: <20190726.140420.688330328284393964.davem@davemloft.net>

Quoting David Miller <davem@davemloft.net>:

> From: René van Dorst <opensource@vdorst.com>
> Date: Wed, 24 Jul 2019 21:25:49 +0200
>
>> @@ -1167,6 +1236,10 @@ mt7530_setup(struct dsa_switch *ds)
>>  	u32 id, val;
>>  	struct device_node *dn;
>>  	struct mt7530_dummy_poll p;
>> +	phy_interface_t interface;
>> +	struct device_node *mac_np;
>> +	struct device_node *phy_node;
>> +	const __be32 *_id;
>

Hi David,

> Reverse christmas tree here please.
>
> Thank you.

OK, I shall change that.
I spin a new version.

Greats,

René





^ permalink raw reply

* Re: [PATCH net-next 3/3] net: dsa: mt7530: Add support for port 5
From: René van Dorst @ 2019-07-27 18:38 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: netdev, frank-w, sean.wang, linux, davem, matthias.bgg, andrew,
	vivien.didelot, john, linux-mediatek, linux-mips, robh+dt,
	devicetree
In-Reply-To: <f4a9e219-cd03-1512-874d-925c43e3c44f@gmail.com>

Quoting Florian Fainelli <f.fainelli@gmail.com>:

> On 7/24/2019 9:25 PM, René van Dorst wrote:
>> Adding support for port 5.
>>
>> Port 5 can muxed/interface to:
>> - internal 5th GMAC of the switch; can be used as 2nd CPU port or as
>>   extra port with an external phy for a 6th ethernet port.
>> - internal PHY of port 0 or 4; Used in most applications so that port 0
>>   or 4 is the WAN port and interfaces with the 2nd GMAC of the SOC.
>>
>> Signed-off-by: René van Dorst <opensource@vdorst.com>
>
> [snip]
>
>> +	/* Setup port 5 */
>> +	priv->p5_intf_sel = P5_DISABLED;
>> +	interface = PHY_INTERFACE_MODE_NA;
>> +
>> +	if (!dsa_is_unused_port(ds, 5)) {
>> +		priv->p5_intf_sel = P5_INTF_SEL_GMAC5;
>> +		interface = of_get_phy_mode(ds->ports[5].dn);
>> +	} else {
>> +		/* Scan the ethernet nodes. Look for GMAC1, Lookup used phy */
>> +		for_each_child_of_node(dn, mac_np) {
>> +			if (!of_device_is_compatible(mac_np,
>> +						     "mediatek,eth-mac"))
>> +				continue;
>> +			_id = of_get_property(mac_np, "reg", NULL);
>> +			if (be32_to_cpup(_id)  != 1)
>> +				continue;
>> +
>> +			interface = of_get_phy_mode(mac_np);
>> +			phy_node = of_parse_phandle(mac_np, "phy-handle", 0);
>> +
>> +			if (phy_node->parent == priv->dev->of_node->parent) {
>> +				_id = of_get_property(phy_node, "reg", NULL);
>> +				id = be32_to_cpup(_id);
>> +				if (id == 0)
>> +					priv->p5_intf_sel = P5_INTF_SEL_PHY_P0;
>> +				if (id == 4)
>> +					priv->p5_intf_sel = P5_INTF_SEL_PHY_P4;
>

Hi Florian,

> Can you use of_mdio_parse_addr() here?

Yes that function be used.

Thanks for mention this function.

I see that I can refactor this scan routine a bit more.
Also I missing a of_node_put(phy_node) at the end.

> --
> Florian

Greats,

René




^ permalink raw reply

* Re: [PATCH bpf-next 02/10] libbpf: implement BPF CO-RE offset relocation algorithm
From: Andrii Nakryiko @ 2019-07-27 18:24 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Alexei Starovoitov, Andrii Nakryiko, bpf, Networking,
	Daniel Borkmann, Yonghong Song, Kernel Team
In-Reply-To: <957fff81-d845-ebc9-0e80-dbb1f1736b40@fb.com>

On Sat, Jul 27, 2019 at 10:00 AM Alexei Starovoitov <ast@fb.com> wrote:
>
> On 7/26/19 11:25 PM, Andrii Nakryiko wrote:
> >>> +     } else if (class == BPF_ST && BPF_MODE(insn->code) == BPF_MEM) {
> >>> +             if (insn->imm != orig_off)
> >>> +                     return -EINVAL;
> >>> +             insn->imm = new_off;
> >>> +             pr_debug("prog '%s': patched insn #%d (ST | MEM) imm %d -> %d\n",
> >>> +                      bpf_program__title(prog, false),
> >>> +                      insn_idx, orig_off, new_off);
> >> I'm pretty sure llvm was not capable of emitting BPF_ST insn.
> >> When did that change?
> > I just looked at possible instructions that could have 32-bit
> > immediate value. This is `*(rX) = offsetof(struct s, field)`, which I
> > though is conceivable. Do you think I should drop it?
>
> Just trying to point out that since it's not emitted by llvm
> this code is likely untested ?
> Or you've created a bpf asm test for this?


Yeah, it's untested right now. Let me try to come up with LLVM
assembly + relocation (not yet sure how/whether builtin works with
inline assembly), if that works out, I'll leave this, if not, I'll
drop BPF_ST|BPF_MEM part.

>
>

^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Song Liu @ 2019-07-27 18:20 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, Kees Cook, linux-security@vger.kernel.org,
	Networking, bpf, Alexei Starovoitov, Daniel Borkmann, Kernel Team,
	Lorenz Bauer, Jann Horn, Greg KH, Linux API
In-Reply-To: <7F51F8B8-CF4C-4D82-AAE1-F0F28951DB7F@fb.com>

Hi Andy, 

>>>> 
>>> 
>>> Well, yes. sys_bpf() is pretty powerful. 
>>> 
>>> The goal of /dev/bpf is to enable special users to call sys_bpf(). In 
>>> the meanwhile, such users should not take down the whole system easily
>>> by accident, e.g., with rm -rf /.
>> 
>> That’s easy, though — bpftool could learn to read /etc/bpfusers before allowing ruid != 0.
> 
> This is a great idea! fscaps + /etc/bpfusers should do the trick. 

After some discussions and more thinking on this, I have some concerns 
with the user space only approach.  

IIUC, your proposal for user space only approach is like: 

1. bpftool (and other tools) check /etc/bpfusers and only do 
   setuid for allowed users:

	int main()
	{
		if (/* uid in /etc/bpfusers */)
			setuid(0);
		sys_bpf(...);
	}

2. bpftool (and other tools) is installed with CAP_SETUID:

	setcap cap_setuid=e+p /bin/bpftool

3. sys admin maintains proper /etc/bpfusers. 

This approach is not ideal, because we need to trust the tool to give 
it CAP_SETUID. A hacked tool could easily bypass /etc/bpfusers check
or use other root only sys calls after setuid(0). 

Does this make sense? (Or did I misunderstand anything?)

Thanks,
Song


^ permalink raw reply

* Re: [PATCH bpf-next 2/6] bpf: add BPF_MAP_DUMP command to dump more than one entry per call
From: Yonghong Song @ 2019-07-27 17:54 UTC (permalink / raw)
  To: Brian Vazquez
  Cc: Alexei Starovoitov, Song Liu, Brian Vazquez, Daniel Borkmann,
	David S . Miller, Stanislav Fomichev, Willem de Bruijn,
	Petar Penkov, Networking, bpf
In-Reply-To: <CABCgpaVB+iDGO132d9CTtC_GYiKJuuL6pe5_Krm3-THgvfMO=A@mail.gmail.com>



On 7/26/19 4:36 PM, Brian Vazquez wrote:
> On Thu, Jul 25, 2019 at 11:10 PM Yonghong Song <yhs@fb.com> wrote:
>>
>>
>>
>> On 7/25/19 6:47 PM, Alexei Starovoitov wrote:
>>> On Thu, Jul 25, 2019 at 6:24 PM Brian Vazquez <brianvv.kernel@gmail.com> wrote:
>>>>
>>>> On Thu, Jul 25, 2019 at 4:54 PM Alexei Starovoitov
>>>> <alexei.starovoitov@gmail.com> wrote:
>>>>>
>>>>> On Thu, Jul 25, 2019 at 04:25:53PM -0700, Brian Vazquez wrote:
>>>>>>>>> If prev_key is deleted before map_get_next_key(), we get the first key
>>>>>>>>> again. This is pretty weird.
>>>>>>>>
>>>>>>>> Yes, I know. But note that the current scenario happens even for the
>>>>>>>> old interface (imagine you are walking a map from userspace and you
>>>>>>>> tried get_next_key the prev_key was removed, you will start again from
>>>>>>>> the beginning without noticing it).
>>>>>>>> I tried to sent a patch in the past but I was missing some context:
>>>>>>>> before NULL was used to get the very first_key the interface relied in
>>>>>>>> a random (non existent) key to retrieve the first_key in the map, and
>>>>>>>> I was told what we still have to support that scenario.
>>>>>>>
>>>>>>> BPF_MAP_DUMP is slightly different, as you may return the first key
>>>>>>> multiple times in the same call. Also, BPF_MAP_DUMP is new, so we
>>>>>>> don't have to support legacy scenarios.
>>>>>>>
>>>>>>> Since BPF_MAP_DUMP keeps a list of elements. It is possible to try
>>>>>>> to look up previous keys. Would something down this direction work?
>>>>>>
>>>>>> I've been thinking about it and I think first we need a way to detect
>>>>>> that since key was not present we got the first_key instead:
>>>>>>
>>>>>> - One solution I had in mind was to explicitly asked for the first key
>>>>>> with map_get_next_key(map, NULL, first_key) and while walking the map
>>>>>> check that map_get_next_key(map, prev_key, key) doesn't return the
>>>>>> same key. This could be done using memcmp.
>>>>>> - Discussing with Stan, he mentioned that another option is to support
>>>>>> a flag in map_get_next_key to let it know that we want an error
>>>>>> instead of the first_key.
>>>>>>
>>>>>> After detecting the problem we also need to define what we want to do,
>>>>>> here some options:
>>>>>>
>>>>>> a) Return the error to the caller
>>>>>> b) Try with previous keys if any (which be limited to the keys that we
>>>>>> have traversed so far in this dump call)
>>>>>> c) continue with next entries in the map. array is easy just get the
>>>>>> next valid key (starting on i+1), but hmap might be difficult since
>>>>>> starting on the next bucket could potentially skip some keys that were
>>>>>> concurrently added to the same bucket where key used to be, and
>>>>>> starting on the same bucket could lead us to return repeated elements.
>>>>>>
>>>>>> Or maybe we could support those 3 cases via flags and let the caller
>>>>>> decide which one to use?
>>>>>
>>>>> this type of indecision is the reason why I wasn't excited about
>>>>> batch dumping in the first place and gave 'soft yes' when Stan
>>>>> mentioned it during lsf/mm/bpf uconf.
>>>>> We probably shouldn't do it.
>>>>> It feels this map_dump makes api more complex and doesn't really
>>>>> give much benefit to the user other than large map dump becomes faster.
>>>>> I think we gotta solve this problem differently.
>>>>
>>>> Some users are working around the dumping problems with the existing
>>>> api by creating a bpf_map_get_next_key_and_delete userspace function
>>>> (see https://urldefense.proofpoint.com/v2/url?u=https-3A__www.bouncybouncy.net_blog_bpf-5Fmap-5Fget-5Fnext-5Fkey-2Dpitfalls_&d=DwIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=DA8e1B5r073vIqRrFz7MRA&m=XvNxqsDhRi62gzZ04HbLRTOFJX8X6mTuK7PZGn80akY&s=7q7beZxOJJ3Q0el8L0r-xDctedSpnEejJ6PVX1XYotQ&e= )
>>>> which in my opinion is actually a good idea. The only problem with
>>>> that is that calling bpf_map_get_next_key(fd, key, next_key) and then
>>>> bpf_map_delete_elem(fd, key) from userspace is racing with kernel code
>>>> and it might lose some information when deleting.
>>>> We could then do map_dump_and_delete using that idea but in the kernel
>>>> where we could better handle the racing condition. In that scenario
>>>> even if we retrieve the same key it will contain different info ( the
>>>> delta between old and new value). Would that work?
>>>
>>> you mean get_next+lookup+delete at once?
>>> Sounds useful.
>>> Yonghong has been thinking about batching api as well.
>>
>> In bcc, we have many instances like this:
>>      getting all (key value) pairs, do some analysis and output,
>>      delete all keys
>>
>> The implementation typically like
>>      /* to get all (key, value) pairs */
>>      while(bpf_get_next_key() == 0)
>>        bpf_map_lookup()
>>      /* do analysis and output */
>>      for (all keys)
>>        bpf_map_delete()
> 
> If you do that in a map that is being modified while you are doing the
> analysis and output, you will lose some new data by deleting the keys,
> right?

Agreed, it is possible that if the same keys are reused to generate data 
during analysis and output period, we will miss them by deleting them.
 From that perspective, your above approach
       while (bpf_get_next_key())
           bpf_map_delete(prev_key)
           bpf_map_lookup()
           reset prev_keey
should provide a better alternative.

> 
>> get_next+lookup+delete will be definitely useful.
>> batching will be even better to save the number of syscalls.
>>
>> An alternative is to do batch get_next+lookup and batch delete
>> to achieve similar goal as the above code.
> 
> What I mentioned above is what it makes me think that with the
> deletion it'd be better if we perform these 3 operations at once:
> get_next+lookup+delete in a jumbo/atomic command and batch them later?

Agree. This is indeed the one most useful for bcc use case as well.

> 
>>
>> There is a minor difference between this approach
>> and the above get_next+lookup+delete.
>> During scanning the hash map, get_next+lookup may get less number
>> of elements compared to get_next+lookup+delete as the latter
>> may have more later-inserted hash elements after the operation
>> start. But both are inaccurate, so probably the difference
>> is minor.
>>
>>>
>>> I think if we cannot figure out how to make a batch of two commands
>>> get_next + lookup to work correctly then we need to identify/invent one
>>> command and make batching more generic.
>>
>> not 100% sure. It will be hard to define what is "correctly".
> 
> I agree, it'll be hard to define what is the right behavior.
> 
>> For not changing map, looping of (get_next, lookup) and batch
>> get_next+lookup should have the same results.
> 
> This is true for the api I'm presenting the only think that I was
> missing was what to do for changing maps to avoid the weird scenario
> (getting the first key due a concurrent deletion). And, in my opinion
> the way to go should be what also Willem supported: return the err to
> the caller and restart the dumping. I could do this with existing code
> just by detecting that we do provide a prev_key and got the first_key
> instead of the next_key or even implement a new function if you want
> to.

Always starting from the first key has its drawback as we keep getting 
the new elements if they are constantly populated. This may skew
the results for a large hash table.

Maybe we can just do lookup+delete or batch lookup+delete?
user gives NULL means the first key to lookup/delete.
Every (batch) lookup+delete will deletes one or a set of keys.
The set of keys are retrieved using internal get_next .
The (batch) lookup+delete will return next available key, which
user can be used for next (batch) lookup+delete.
If user provided key does not match, user can provide a flag
to go to the first key, or return an error.


> 
>> For constant changing loops, not sure how to define which one
>> is correct. If users have concerns, they may need to just pick one
>> which gives them more comfort.
>>
>>> Like make one jumbo/compound/atomic command to be get_next+lookup+delete.
>>> Define the semantics of this single compound command.
>>> And then let batching to be a multiplier of such command.
>>> In a sense that multiplier 1 or N should be have the same way.
>>> No extra flags to alter the batching.
>>> The high level description of the batch would be:
>>> pls execute get_next,lookup,delete and repeat it N times.
>>> or
>>> pls execute get_next,lookup and repeat N times.
> 
> But any attempt to do get_next+lookup will have same problem with
> deletions right?
> 
> I don't see how we could do it more consistent than what I'm
> proposing. Let's just support one case: report an error if the
> prev_key was not found instead of retrieving the first_key. Would that
> work?
> 
>>> where each command action is defined to be composable.
>>>
>>> Just a rough idea.
>>>

^ permalink raw reply

* [PATCH net] mlxsw: spectrum_ptp: Increase parsing depth when PTP is enabled
From: Ido Schimmel @ 2019-07-27 17:35 UTC (permalink / raw)
  To: netdev; +Cc: davem, jiri, petrm, mlxsw, Ido Schimmel

From: Petr Machata <petrm@mellanox.com>

Spectrum systems have a configurable limit on how far into the packet they
parse. By default, the limit is 96 bytes.

An IPv6 PTP packet is layered as Ethernet/IPv6/UDP (14+40+8 bytes), and
sequence ID of a PTP event is only available 32 bytes into payload, for a
total of 94 bytes. When an additional 802.1q header is present as
well (such as when ptp4l is running on a VLAN port), the parsing limit is
exceeded. Such packets are not recognized as PTP, and are not timestamped.

Therefore generalize the current VXLAN-specific parsing depth setting to
allow reference-counted requests from other modules as well. Keep it in the
VXLAN module, because the MPRS register also configures UDP destination
port number used for VXLAN, and is thus closely tied to the VXLAN code
anyway.

Then invoke the new interfaces from both VXLAN (in obvious places), as well
as from PTP code, when the (global) timestamping configuration changes from
disabled to enabled or vice versa.

Fixes: 8748642751ed ("mlxsw: spectrum: PTP: Support SIOCGHWTSTAMP, SIOCSHWTSTAMP ioctls")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 .../net/ethernet/mellanox/mlxsw/spectrum.h    |  4 +
 .../ethernet/mellanox/mlxsw/spectrum_nve.c    |  1 +
 .../ethernet/mellanox/mlxsw/spectrum_nve.h    |  1 +
 .../mellanox/mlxsw/spectrum_nve_vxlan.c       | 76 ++++++++++++++-----
 .../ethernet/mellanox/mlxsw/spectrum_ptp.c    | 17 +++++
 5 files changed, 82 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 131f62ce9297..6664119fb0c8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -951,4 +951,8 @@ void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port);
 int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp);
 
+/* spectrum_nve_vxlan.c */
+int mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
index 1df164a4b06d..17f334b46c40 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
@@ -775,6 +775,7 @@ static void mlxsw_sp_nve_tunnel_fini(struct mlxsw_sp *mlxsw_sp)
 		ops->fini(nve);
 		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
 				   nve->tunnel_index);
+		memset(&nve->config, 0, sizeof(nve->config));
 	}
 	nve->num_nve_tunnels--;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
index 0035640156a1..12f664f42f21 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
@@ -29,6 +29,7 @@ struct mlxsw_sp_nve {
 	unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX];
 	u32 tunnel_index;
 	u16 ul_rif_index;	/* Reserved for Spectrum */
+	unsigned int inc_parsing_depth_refs;
 };
 
 struct mlxsw_sp_nve_ops {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
index 93ccd9fc2266..05517c7feaa5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
@@ -103,9 +103,9 @@ static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
 	config->udp_dport = cfg->dst_port;
 }
 
-static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
-				    unsigned int parsing_depth,
-				    __be16 udp_dport)
+static int __mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
+				      unsigned int parsing_depth,
+				      __be16 udp_dport)
 {
 	char mprs_pl[MLXSW_REG_MPRS_LEN];
 
@@ -113,6 +113,56 @@ static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
 }
 
+static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
+				    __be16 udp_dport)
+{
+	int parsing_depth = mlxsw_sp->nve->inc_parsing_depth_refs ?
+				MLXSW_SP_NVE_VXLAN_PARSING_DEPTH :
+				MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH;
+
+	return __mlxsw_sp_nve_parsing_set(mlxsw_sp, parsing_depth, udp_dport);
+}
+
+static int
+__mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp,
+				     __be16 udp_dport)
+{
+	int err;
+
+	mlxsw_sp->nve->inc_parsing_depth_refs++;
+
+	err = mlxsw_sp_nve_parsing_set(mlxsw_sp, udp_dport);
+	if (err)
+		goto err_nve_parsing_set;
+	return 0;
+
+err_nve_parsing_set:
+	mlxsw_sp->nve->inc_parsing_depth_refs--;
+	return err;
+}
+
+static void
+__mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp,
+				     __be16 udp_dport)
+{
+	mlxsw_sp->nve->inc_parsing_depth_refs--;
+	mlxsw_sp_nve_parsing_set(mlxsw_sp, udp_dport);
+}
+
+int mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp)
+{
+	__be16 udp_dport = mlxsw_sp->nve->config.udp_dport;
+
+	return __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, udp_dport);
+}
+
+void mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp)
+{
+	__be16 udp_dport = mlxsw_sp->nve->config.udp_dport;
+
+	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, udp_dport);
+}
+
 static void
 mlxsw_sp_nve_vxlan_config_prepare(char *tngcr_pl,
 				  const struct mlxsw_sp_nve_config *config)
@@ -176,9 +226,7 @@ static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve,
 	struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
 	int err;
 
-	err = mlxsw_sp_nve_parsing_set(mlxsw_sp,
-				       MLXSW_SP_NVE_VXLAN_PARSING_DEPTH,
-				       config->udp_dport);
+	err = __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, config->udp_dport);
 	if (err)
 		return err;
 
@@ -203,8 +251,7 @@ static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve,
 err_rtdp_set:
 	mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
 err_config_set:
-	mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
-				 config->udp_dport);
+	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
 	return err;
 }
 
@@ -216,8 +263,7 @@ static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
 	mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
 					 config->ul_proto, &config->ul_sip);
 	mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
-	mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
-				 config->udp_dport);
+	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
 }
 
 static int
@@ -320,9 +366,7 @@ static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve,
 	struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
 	int err;
 
-	err = mlxsw_sp_nve_parsing_set(mlxsw_sp,
-				       MLXSW_SP_NVE_VXLAN_PARSING_DEPTH,
-				       config->udp_dport);
+	err = __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, config->udp_dport);
 	if (err)
 		return err;
 
@@ -348,8 +392,7 @@ static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve,
 err_rtdp_set:
 	mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp);
 err_config_set:
-	mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
-				 config->udp_dport);
+	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
 	return err;
 }
 
@@ -361,8 +404,7 @@ static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
 	mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
 					 config->ul_proto, &config->ul_sip);
 	mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp);
-	mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
-				 config->udp_dport);
+	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
 }
 
 const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index bd9c2bc2d5d6..4b352a71f76e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -979,19 +979,36 @@ static int mlxsw_sp1_ptp_mtpppc_update(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_port *tmp;
+	u16 orig_ing_types = 0;
+	u16 orig_egr_types = 0;
 	int i;
+	int err;
 
 	/* MTPPPC configures timestamping globally, not per port. Find the
 	 * configuration that contains all configured timestamping requests.
 	 */
 	for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) {
 		tmp = mlxsw_sp->ports[i];
+		if (tmp) {
+			orig_ing_types |= tmp->ptp.ing_types;
+			orig_egr_types |= tmp->ptp.egr_types;
+		}
 		if (tmp && tmp != mlxsw_sp_port) {
 			ing_types |= tmp->ptp.ing_types;
 			egr_types |= tmp->ptp.egr_types;
 		}
 	}
 
+	if ((ing_types || egr_types) && !(orig_egr_types || orig_egr_types)) {
+		err = mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp);
+		if (err) {
+			netdev_err(mlxsw_sp_port->dev, "Failed to increase parsing depth");
+			return err;
+		}
+	}
+	if (!(ing_types || egr_types) && (orig_egr_types || orig_egr_types))
+		mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp);
+
 	return mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp_port->mlxsw_sp,
 				       ing_types, egr_types);
 }
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next 3/3] mlxsw: spectrum_flower: Forbid to offload match on reserved TCP flags bits
From: Ido Schimmel @ 2019-07-27 17:32 UTC (permalink / raw)
  To: netdev; +Cc: davem, jiri, mlxsw, Ido Schimmel
In-Reply-To: <20190727173257.6848-1-idosch@idosch.org>

From: Jiri Pirko <jiri@mellanox.com>

Matching on reserved TCP flags bits is only supported using custom
parser. Since the usecase for that is not known now, just forbid to
offload rules that match on these bits.

Reported-by: Alex Kushnarov <alexanderk@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index c86d582dafbe..0ad1a24abfc6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -267,6 +267,12 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
 
 	flow_rule_match_tcp(rule, &match);
 
+	if (match.mask->flags & htons(0x0E00)) {
+		NL_SET_ERR_MSG_MOD(f->common.extack, "TCP flags match not supported on reserved bits");
+		dev_err(mlxsw_sp->bus_info->dev, "TCP flags match not supported on reserved bits\n");
+		return -EINVAL;
+	}
+
 	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_TCP_FLAGS,
 				       ntohs(match.key->flags),
 				       ntohs(match.mask->flags));
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next 0/3] mlxsw: spectrum_acl: Forbid unsupported filters
From: Ido Schimmel @ 2019-07-27 17:32 UTC (permalink / raw)
  To: netdev; +Cc: davem, jiri, mlxsw, Ido Schimmel

From: Ido Schimmel <idosch@mellanox.com>

Patches #1-#2 make mlxsw reject unsupported egress filters. These
include filters that match on VLAN and filters associated with a
redirect action. Patch #1 rejects such filters when they are configured
on egress and patch #2 rejects such filters when they are configured in
a shared block that user tries to bind to egress.

Patch #3 forbids matching on reserved TCP flags as this is not supported
by the current keys that mlxsw uses.

Jiri Pirko (3):
  mlxsw: spectrum_flower: Forbid to offload mirred redirect on egress
  mlxsw: spectrum_acl: Track rules that forbid egress block bind
  mlxsw: spectrum_flower: Forbid to offload match on reserved TCP flags
    bits

 .../net/ethernet/mellanox/mlxsw/spectrum.c    |  2 +-
 .../net/ethernet/mellanox/mlxsw/spectrum.h    |  7 ++++--
 .../ethernet/mellanox/mlxsw/spectrum_acl.c    | 17 ++++++++++----
 .../ethernet/mellanox/mlxsw/spectrum_flower.c | 22 +++++++++++++++++++
 4 files changed, 41 insertions(+), 7 deletions(-)

-- 
2.21.0


^ permalink raw reply

* [PATCH net-next 2/3] mlxsw: spectrum_acl: Track rules that forbid egress block bind
From: Ido Schimmel @ 2019-07-27 17:32 UTC (permalink / raw)
  To: netdev; +Cc: davem, jiri, mlxsw, Ido Schimmel
In-Reply-To: <20190727173257.6848-1-idosch@idosch.org>

From: Jiri Pirko <jiri@mellanox.com>

Some matches and actions are not supported on egress. Track such rules
and forbid a bind of block which contains them to egress.

With this patch, the kernel tells the user he cannot do that:
$ tc qdisc add dev ens16np1 ingress_block 22 clsact
$ tc filter add block 22 protocol 802.1q pref 2 handle 101 flower vlan_id 100 skip_sw action pass
$ tc qdisc add dev ens16np2 egress_block 22 clsact
Error: mlxsw_spectrum: Block cannot be bound to egress because it contains unsupported rules.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c  |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h  |  7 +++++--
 .../net/ethernet/mellanox/mlxsw/spectrum_acl.c  | 17 +++++++++++++----
 .../ethernet/mellanox/mlxsw/spectrum_flower.c   | 11 +++++++++++
 4 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 7e8a54068d92..9277b3f125e8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1625,7 +1625,7 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 	flow_block_cb_incref(block_cb);
 	err = mlxsw_sp_acl_block_bind(mlxsw_sp, acl_block,
-				      mlxsw_sp_port, ingress);
+				      mlxsw_sp_port, ingress, f->extack);
 	if (err)
 		goto err_block_bind;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 131f62ce9297..c78d93afbb9d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -623,7 +623,8 @@ struct mlxsw_sp_acl_rule_info {
 	unsigned int priority;
 	struct mlxsw_afk_element_values values;
 	struct mlxsw_afa_block *act_block;
-	u8 action_created:1;
+	u8 action_created:1,
+	   egress_bind_blocker:1;
 	unsigned int counter_index;
 };
 
@@ -642,6 +643,7 @@ struct mlxsw_sp_acl_block {
 	struct mlxsw_sp *mlxsw_sp;
 	unsigned int rule_count;
 	unsigned int disable_count;
+	unsigned int egress_blocker_rule_count;
 	struct net *net;
 };
 
@@ -657,7 +659,8 @@ void mlxsw_sp_acl_block_destroy(struct mlxsw_sp_acl_block *block);
 int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_block *block,
 			    struct mlxsw_sp_port *mlxsw_sp_port,
-			    bool ingress);
+			    bool ingress,
+			    struct netlink_ext_ack *extack);
 int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp,
 			      struct mlxsw_sp_acl_block *block,
 			      struct mlxsw_sp_port *mlxsw_sp_port,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index e8ac90564dbe..1aaab8446270 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -239,7 +239,8 @@ mlxsw_sp_acl_block_lookup(struct mlxsw_sp_acl_block *block,
 int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_block *block,
 			    struct mlxsw_sp_port *mlxsw_sp_port,
-			    bool ingress)
+			    bool ingress,
+			    struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_acl_block_binding *binding;
 	int err;
@@ -247,6 +248,11 @@ int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp,
 	if (WARN_ON(mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress)))
 		return -EEXIST;
 
+	if (!ingress && block->egress_blocker_rule_count) {
+		NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules");
+		return -EOPNOTSUPP;
+	}
+
 	binding = kzalloc(sizeof(*binding), GFP_KERNEL);
 	if (!binding)
 		return -ENOMEM;
@@ -672,6 +678,7 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp,
 {
 	struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+	struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
 	int err;
 
 	err = ops->rule_add(mlxsw_sp, ruleset->priv, rule->priv, rule->rulei);
@@ -689,14 +696,14 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp,
 		 * one, to be directly bound to device. The rest of the
 		 * rulesets are bound by "Goto action set".
 		 */
-		err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset,
-						      ruleset->ht_key.block);
+		err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block);
 		if (err)
 			goto err_ruleset_block_bind;
 	}
 
 	list_add_tail(&rule->list, &mlxsw_sp->acl->rules);
-	ruleset->ht_key.block->rule_count++;
+	block->rule_count++;
+	block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker;
 	return 0;
 
 err_ruleset_block_bind:
@@ -712,7 +719,9 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp,
 {
 	struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+	struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
 
+	block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker;
 	ruleset->ht_key.block->rule_count--;
 	list_del(&rule->list);
 	if (!ruleset->ht_key.chain_index &&
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 1eeac8a36ead..c86d582dafbe 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -83,6 +83,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 				return -EOPNOTSUPP;
 			}
 
+			/* Forbid block with this rulei to be bound
+			 * to egress in future.
+			 */
+			rulei->egress_bind_blocker = 1;
+
 			fid = mlxsw_sp_acl_dummy_fid(mlxsw_sp);
 			fid_index = mlxsw_sp_fid_index(fid);
 			err = mlxsw_sp_acl_rulei_act_fid_set(mlxsw_sp, rulei,
@@ -395,6 +400,12 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 			NL_SET_ERR_MSG_MOD(f->common.extack, "vlan_id key is not supported on egress");
 			return -EOPNOTSUPP;
 		}
+
+		/* Forbid block with this rulei to be bound
+		 * to egress in future.
+		 */
+		rulei->egress_bind_blocker = 1;
+
 		if (match.mask->vlan_id != 0)
 			mlxsw_sp_acl_rulei_keymask_u32(rulei,
 						       MLXSW_AFK_ELEMENT_VID,
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next 1/3] mlxsw: spectrum_flower: Forbid to offload mirred redirect on egress
From: Ido Schimmel @ 2019-07-27 17:32 UTC (permalink / raw)
  To: netdev; +Cc: davem, jiri, mlxsw, Ido Schimmel
In-Reply-To: <20190727173257.6848-1-idosch@idosch.org>

From: Jiri Pirko <jiri@mellanox.com>

Spectrum ASIC does not support redirection on egress, so refuse to
insert such flows:

$ tc qdisc add dev ens16np1 clsact
$ tc filter add dev ens16np1 egress protocol all pref 1 handle 101 flower skip_sw action mirred egress redirect dev ens16np2
Error: mlxsw_spectrum: Redirect action is not supported on egress.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 202e9a246019..1eeac8a36ead 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -78,6 +78,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 			struct mlxsw_sp_fid *fid;
 			u16 fid_index;
 
+			if (mlxsw_sp_acl_block_is_egress_bound(block)) {
+				NL_SET_ERR_MSG_MOD(extack, "Redirect action is not supported on egress");
+				return -EOPNOTSUPP;
+			}
+
 			fid = mlxsw_sp_acl_dummy_fid(mlxsw_sp);
 			fid_index = mlxsw_sp_fid_index(fid);
 			err = mlxsw_sp_acl_rulei_act_fid_set(mlxsw_sp, rulei,
-- 
2.21.0


^ permalink raw reply related

* Re: next-20190723: bpf/seccomp - systemd/journald issue?
From: Yonghong Song @ 2019-07-27 17:11 UTC (permalink / raw)
  To: sedat.dilek@gmail.com, Alexei Starovoitov
  Cc: Alexei Starovoitov, Daniel Borkmann, Martin Lau, Song Liu,
	netdev@vger.kernel.org, bpf@vger.kernel.org, Clang-Built-Linux ML,
	Kees Cook, Nick Desaulniers, Nathan Chancellor
In-Reply-To: <CA+icZUWVf6AK3bxfWBZ7iM1QTyk_G-4+1_LyK0jkoBDkDzvx4Q@mail.gmail.com>



On 7/27/19 1:16 AM, Sedat Dilek wrote:
> On Sat, Jul 27, 2019 at 9:36 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>>
>> On Sat, Jul 27, 2019 at 4:24 AM Alexei Starovoitov
>> <alexei.starovoitov@gmail.com> wrote:
>>>
>>> On Fri, Jul 26, 2019 at 2:19 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>>>>
>>>> On Fri, Jul 26, 2019 at 11:10 PM Yonghong Song <yhs@fb.com> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 7/26/19 2:02 PM, Sedat Dilek wrote:
>>>>>> On Fri, Jul 26, 2019 at 10:38 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>>>>>>>
>>>>>>> Hi Yonghong Song,
>>>>>>>
>>>>>>> On Fri, Jul 26, 2019 at 5:45 PM Yonghong Song <yhs@fb.com> wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> On 7/26/19 1:26 AM, Sedat Dilek wrote:
>>>>>>>>> Hi,
>>>>>>>>>
>>>>>>>>> I have opened a new issue in the ClangBuiltLinux issue tracker.
>>>>>>>>
>>>>>>>> Glad to know clang 9 has asm goto support and now It can compile
>>>>>>>> kernel again.
>>>>>>>>
>>>>>>>
>>>>>>> Yupp.
>>>>>>>
>>>>>>>>>
>>>>>>>>> I am seeing a problem in the area bpf/seccomp causing
>>>>>>>>> systemd/journald/udevd services to fail.
>>>>>>>>>
>>>>>>>>> [Fri Jul 26 08:08:43 2019] systemd[453]: systemd-udevd.service: Failed
>>>>>>>>> to connect stdout to the journal socket, ignoring: Connection refused
>>>>>>>>>
>>>>>>>>> This happens when I use the (LLVM) LLD ld.lld-9 linker but not with
>>>>>>>>> BFD linker ld.bfd on Debian/buster AMD64.
>>>>>>>>> In both cases I use clang-9 (prerelease).
>>>>>>>>
>>>>>>>> Looks like it is a lld bug.
>>>>>>>>
>>>>>>>> I see the stack trace has __bpf_prog_run32() which is used by
>>>>>>>> kernel bpf interpreter. Could you try to enable bpf jit
>>>>>>>>      sysctl net.core.bpf_jit_enable = 1
>>>>>>>> If this passed, it will prove it is interpreter related.
>>>>>>>>
>>>>>>>
>>>>>>> After...
>>>>>>>
>>>>>>> sysctl -w net.core.bpf_jit_enable=1
>>>>>>>
>>>>>>> I can start all failed systemd services.
>>>>>>>
>>>>>>> systemd-journald.service
>>>>>>> systemd-udevd.service
>>>>>>> haveged.service
>>>>>>>
>>>>>>> This is in maintenance mode.
>>>>>>>
>>>>>>> What is next: Do set a permanent sysctl setting for net.core.bpf_jit_enable?
>>>>>>>
>>>>>>
>>>>>> This is what I did:
>>>>>
>>>>> I probably won't have cycles to debug this potential lld issue.
>>>>> Maybe you already did, I suggest you put enough reproducible
>>>>> details in the bug you filed against lld so they can take a look.
>>>>>
>>>>
>>>> I understand and will put the journalctl-log into the CBL issue
>>>> tracker and update informations.
>>>>
>>>> Thanks for your help understanding the BPF correlations.
>>>>
>>>> Is setting 'net.core.bpf_jit_enable = 2' helpful here?
>>>
>>> jit_enable=1 is enough.
>>> Or use CONFIG_BPF_JIT_ALWAYS_ON to workaround.
>>>
>>> It sounds like clang miscompiles interpreter.
> 
> Just to clarify:
> This does not happen with clang-9 + ld.bfd (GNU/ld linker).
> 
>>> modprobe test_bpf
>>> should be able to point out which part of interpreter is broken.
>>
>> Maybe we need something like...
>>
>> "bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()"
>>
>> ...for clang?
>>
> 
> Not sure if something like GCC's...
> 
> -fgcse
> 
> Perform a global common subexpression elimination pass. This pass also
> performs global constant and copy propagation.
> 
> Note: When compiling a program using computed gotos, a GCC extension,
> you may get better run-time performance if you disable the global
> common subexpression elimination pass by adding -fno-gcse to the
> command line.
> 
> Enabled at levels -O2, -O3, -Os.
> 
> ...is available for clang.
> 
> I tried with hopping to turn off "global common subexpression elimination":
> 
> diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
> index 383c87300b0d..92f934a1e9ff 100644
> --- a/arch/x86/net/Makefile
> +++ b/arch/x86/net/Makefile
> @@ -3,6 +3,8 @@
>   # Arch-specific network modules
>   #
> 
> +KBUILD_CFLAGS += -O0

This won't work. First, you added to the wrong file. The interpreter
is at kernel/bpf/core.c.

Second, kernel may have compilation issues with -O0.

> +
>   ifeq ($(CONFIG_X86_32),y)
>           obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
>   else
> 
> Still see...
> BROKEN: test_bpf: #294 BPF_MAXINSNS: Jump, gap, jump, ... jited:0
> 
> - Sedat -
> 

^ permalink raw reply

* Re: next-20190723: bpf/seccomp - systemd/journald issue?
From: Yonghong Song @ 2019-07-27 17:08 UTC (permalink / raw)
  To: sedat.dilek@gmail.com, Alexei Starovoitov
  Cc: Alexei Starovoitov, Daniel Borkmann, Martin Lau, Song Liu,
	netdev@vger.kernel.org, bpf@vger.kernel.org, Clang-Built-Linux ML,
	Kees Cook, Nick Desaulniers, Nathan Chancellor
In-Reply-To: <CA+icZUXGPCgdJzxTO+8W0EzNLZEQ88J_wusp7fPfEkNE2RoXJA@mail.gmail.com>



On 7/27/19 12:36 AM, Sedat Dilek wrote:
> On Sat, Jul 27, 2019 at 4:24 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
>>
>> On Fri, Jul 26, 2019 at 2:19 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>>>
>>> On Fri, Jul 26, 2019 at 11:10 PM Yonghong Song <yhs@fb.com> wrote:
>>>>
>>>>
>>>>
>>>> On 7/26/19 2:02 PM, Sedat Dilek wrote:
>>>>> On Fri, Jul 26, 2019 at 10:38 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>>>>>>
>>>>>> Hi Yonghong Song,
>>>>>>
>>>>>> On Fri, Jul 26, 2019 at 5:45 PM Yonghong Song <yhs@fb.com> wrote:
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 7/26/19 1:26 AM, Sedat Dilek wrote:
>>>>>>>> Hi,
>>>>>>>>
>>>>>>>> I have opened a new issue in the ClangBuiltLinux issue tracker.
>>>>>>>
>>>>>>> Glad to know clang 9 has asm goto support and now It can compile
>>>>>>> kernel again.
>>>>>>>
>>>>>>
>>>>>> Yupp.
>>>>>>
>>>>>>>>
>>>>>>>> I am seeing a problem in the area bpf/seccomp causing
>>>>>>>> systemd/journald/udevd services to fail.
>>>>>>>>
>>>>>>>> [Fri Jul 26 08:08:43 2019] systemd[453]: systemd-udevd.service: Failed
>>>>>>>> to connect stdout to the journal socket, ignoring: Connection refused
>>>>>>>>
>>>>>>>> This happens when I use the (LLVM) LLD ld.lld-9 linker but not with
>>>>>>>> BFD linker ld.bfd on Debian/buster AMD64.
>>>>>>>> In both cases I use clang-9 (prerelease).
>>>>>>>
>>>>>>> Looks like it is a lld bug.
>>>>>>>
>>>>>>> I see the stack trace has __bpf_prog_run32() which is used by
>>>>>>> kernel bpf interpreter. Could you try to enable bpf jit
>>>>>>>      sysctl net.core.bpf_jit_enable = 1
>>>>>>> If this passed, it will prove it is interpreter related.
>>>>>>>
>>>>>>
>>>>>> After...
>>>>>>
>>>>>> sysctl -w net.core.bpf_jit_enable=1
>>>>>>
>>>>>> I can start all failed systemd services.
>>>>>>
>>>>>> systemd-journald.service
>>>>>> systemd-udevd.service
>>>>>> haveged.service
>>>>>>
>>>>>> This is in maintenance mode.
>>>>>>
>>>>>> What is next: Do set a permanent sysctl setting for net.core.bpf_jit_enable?
>>>>>>
>>>>>
>>>>> This is what I did:
>>>>
>>>> I probably won't have cycles to debug this potential lld issue.
>>>> Maybe you already did, I suggest you put enough reproducible
>>>> details in the bug you filed against lld so they can take a look.
>>>>
>>>
>>> I understand and will put the journalctl-log into the CBL issue
>>> tracker and update informations.
>>>
>>> Thanks for your help understanding the BPF correlations.
>>>
>>> Is setting 'net.core.bpf_jit_enable = 2' helpful here?
>>
>> jit_enable=1 is enough.
>> Or use CONFIG_BPF_JIT_ALWAYS_ON to workaround.
>>
>> It sounds like clang miscompiles interpreter.
>> modprobe test_bpf
>> should be able to point out which part of interpreter is broken.
> 
> Maybe we need something like...
> 
> "bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()"
> 
> ...for clang?

Not sure how do you get conclusion it is gcse causing the problem.
But anyway, adding such flag in the kernel is not a good idea.
clang/llvm should be fixed instead. Esp. there is still time
for 9.0.0 release to fix bugs.

> 
> - Sedat -
> 
> [1] https://git.kernel.org/linus/3193c0836f203a91bef96d88c64cccf0be090d9c
> 

^ permalink raw reply

* Re: [PATCH 1/2] ipmr: Make cache queue length configurable
From: Stephen Suryaputra @ 2019-07-27 17:03 UTC (permalink / raw)
  To: Nikolay Aleksandrov
  Cc: Brodie Greenfield, David Miller, Stephen Hemminger, kuznet,
	yoshfuji, netdev, linux-kernel, chris.packham, luuk.paulussen
In-Reply-To: <6e8c51a0-cd34-e14a-7661-6fa5945f278b@cumulusnetworks.com>

On Fri, Jul 26, 2019 at 7:18 AM Nikolay Aleksandrov
<nikolay@cumulusnetworks.com> wrote:

> > You've said it yourself - it has linear traversal time, but doesn't this patch allow any netns on the
> > system to increase its limit to any value, thus possibly affecting others ?
> > Though the socket limit will kick in at some point. I think that's where David
> > was going with his suggestion back in 2018:
> > https://www.spinics.net/lists/netdev/msg514543.html
> >
> > If we add this sysctl now, we'll be stuck with it. I'd prefer David's suggestion
> > so we can rely only on the receive queue queue limit which is already configurable.
> > We still need to be careful with the defaults though, the NOCACHE entry is 128 bytes
> > and with the skb overhead currently on my setup we end up at about 277 entries default limit.
>
> I mean that people might be surprised if they increased that limit by default, that's the
> only problem I'm not sure how to handle. Maybe we need some hard limit anyway.
> Have you done any tests what value works for your setup ?

FYI: for ours, it is 2048.

^ permalink raw reply

* Re: [PATCH bpf-next 02/10] libbpf: implement BPF CO-RE offset relocation algorithm
From: Alexei Starovoitov @ 2019-07-27 17:00 UTC (permalink / raw)
  To: Andrii Nakryiko, Alexei Starovoitov
  Cc: Andrii Nakryiko, bpf, Networking, Daniel Borkmann, Yonghong Song,
	Kernel Team
In-Reply-To: <CAEf4BzZxPgAh4PGSWyD0tPOd1wh=DGZuSe1fzxc-Sgyk4D5vDg@mail.gmail.com>

On 7/26/19 11:25 PM, Andrii Nakryiko wrote:
>>> +     } else if (class == BPF_ST && BPF_MODE(insn->code) == BPF_MEM) {
>>> +             if (insn->imm != orig_off)
>>> +                     return -EINVAL;
>>> +             insn->imm = new_off;
>>> +             pr_debug("prog '%s': patched insn #%d (ST | MEM) imm %d -> %d\n",
>>> +                      bpf_program__title(prog, false),
>>> +                      insn_idx, orig_off, new_off);
>> I'm pretty sure llvm was not capable of emitting BPF_ST insn.
>> When did that change?
> I just looked at possible instructions that could have 32-bit
> immediate value. This is `*(rX) = offsetof(struct s, field)`, which I
> though is conceivable. Do you think I should drop it?

Just trying to point out that since it's not emitted by llvm
this code is likely untested ?
Or you've created a bpf asm test for this?



^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox