Netdev List
 help / color / mirror / Atom feed
* [PATCH btf 1/3] btf: extract BTF type size calculation
From: Andrii Nakryiko @ 2019-01-31  6:58 UTC (permalink / raw)
  To: netdev, ast, yhs, kafai, acme, daniel, kernel-team; +Cc: Andrii Nakryiko
In-Reply-To: <20190131065837.3380288-1-andriin@fb.com>

This pre-patch extracts calculation of amount of space taken by BTF type descriptor
for later reuse by btf_dedup functionality.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/lib/bpf/btf.c | 98 +++++++++++++++++++++------------------------
 1 file changed, 46 insertions(+), 52 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d682d3b8f7b9..79b782963917 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -180,6 +180,37 @@ static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log)
 	return 0;
 }
 
+static int btf_type_size(struct btf_type *t, btf_print_fn_t err_log)
+{
+	int base_size = sizeof(struct btf_type);
+	__u16 vlen = BTF_INFO_VLEN(t->info);
+
+	switch (BTF_INFO_KIND(t->info)) {
+	case BTF_KIND_FWD:
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FUNC:
+		return base_size;
+	case BTF_KIND_INT:
+		return base_size + sizeof(__u32);
+	case BTF_KIND_ENUM:
+		return base_size + vlen * sizeof(struct btf_enum);
+	case BTF_KIND_ARRAY:
+		return base_size + sizeof(struct btf_array);
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+		return base_size + vlen * sizeof(struct btf_member);
+	case BTF_KIND_FUNC_PROTO:
+		return base_size + vlen * sizeof(struct btf_param);
+	default:
+		elog("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info));
+		return -EINVAL;
+	}
+}
+
 static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
 {
 	struct btf_header *hdr = btf->hdr;
@@ -189,41 +220,13 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
 
 	while (next_type < end_type) {
 		struct btf_type *t = next_type;
-		__u16 vlen = BTF_INFO_VLEN(t->info);
+		int type_size;
 		int err;
 
-		next_type += sizeof(*t);
-		switch (BTF_INFO_KIND(t->info)) {
-		case BTF_KIND_INT:
-			next_type += sizeof(int);
-			break;
-		case BTF_KIND_ARRAY:
-			next_type += sizeof(struct btf_array);
-			break;
-		case BTF_KIND_STRUCT:
-		case BTF_KIND_UNION:
-			next_type += vlen * sizeof(struct btf_member);
-			break;
-		case BTF_KIND_ENUM:
-			next_type += vlen * sizeof(struct btf_enum);
-			break;
-		case BTF_KIND_FUNC_PROTO:
-			next_type += vlen * sizeof(struct btf_param);
-			break;
-		case BTF_KIND_FUNC:
-		case BTF_KIND_TYPEDEF:
-		case BTF_KIND_PTR:
-		case BTF_KIND_FWD:
-		case BTF_KIND_VOLATILE:
-		case BTF_KIND_CONST:
-		case BTF_KIND_RESTRICT:
-			break;
-		default:
-			elog("Unsupported BTF_KIND:%u\n",
-			     BTF_INFO_KIND(t->info));
-			return -EINVAL;
-		}
-
+		type_size = btf_type_size(t, err_log);
+		if (type_size < 0)
+			return type_size;
+		next_type += type_size;
 		err = btf_add_type(btf, t);
 		if (err)
 			return err;
@@ -250,21 +253,6 @@ static bool btf_type_is_void_or_null(const struct btf_type *t)
 	return !t || btf_type_is_void(t);
 }
 
-static __s64 btf_type_size(const struct btf_type *t)
-{
-	switch (BTF_INFO_KIND(t->info)) {
-	case BTF_KIND_INT:
-	case BTF_KIND_STRUCT:
-	case BTF_KIND_UNION:
-	case BTF_KIND_ENUM:
-		return t->size;
-	case BTF_KIND_PTR:
-		return sizeof(void *);
-	default:
-		return -EINVAL;
-	}
-}
-
 #define MAX_RESOLVE_DEPTH 32
 
 __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
@@ -278,11 +266,16 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 	t = btf__type_by_id(btf, type_id);
 	for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
 	     i++) {
-		size = btf_type_size(t);
-		if (size >= 0)
-			break;
-
 		switch (BTF_INFO_KIND(t->info)) {
+		case BTF_KIND_INT:
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+		case BTF_KIND_ENUM:
+			size = t->size;
+			goto done;
+		case BTF_KIND_PTR:
+			size = sizeof(void *);
+			goto done;
 		case BTF_KIND_TYPEDEF:
 		case BTF_KIND_VOLATILE:
 		case BTF_KIND_CONST:
@@ -306,6 +299,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 	if (size < 0)
 		return -EINVAL;
 
+done:
 	if (nelems && size > UINT32_MAX / nelems)
 		return -E2BIG;
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH btf 3/3] selftests/btf: add initial BTF dedup tests
From: Andrii Nakryiko @ 2019-01-31  6:58 UTC (permalink / raw)
  To: netdev, ast, yhs, kafai, acme, daniel, kernel-team; +Cc: Andrii Nakryiko
In-Reply-To: <20190131065837.3380288-1-andriin@fb.com>

This patch sets up a new kind of tests (BTF dedup tests) and tests few aspects of
BTF dedup algorithm. More complete set of tests will come in follow up patches.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/lib/bpf/btf.c                    |  12 +
 tools/lib/bpf/btf.h                    |   3 +
 tools/lib/bpf/libbpf.map               |   2 +
 tools/testing/selftests/bpf/test_btf.c | 535 ++++++++++++++++++++++++-
 4 files changed, 537 insertions(+), 15 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 981f166b401c..abfcf80e8625 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -235,6 +235,11 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
 	return 0;
 }
 
+__u32 btf__get_nr_types(const struct btf *btf)
+{
+	return btf->nr_types;
+}
+
 const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
 {
 	if (type_id > btf->nr_types)
@@ -426,6 +431,13 @@ int btf__fd(const struct btf *btf)
 	return btf->fd;
 }
 
+void btf__get_strings(const struct btf *btf, const char **strings,
+		      __u32 *str_len)
+{
+	*strings = btf->strings;
+	*str_len = btf->hdr->str_len;
+}
+
 const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
 {
 	if (offset < btf->hdr->str_len)
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 0c1d739fd4e8..afffe30e84ae 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -62,11 +62,14 @@ LIBBPF_API void btf__free(struct btf *btf);
 LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
 				   const char *type_name);
+LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
 LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
 						  __u32 id);
 LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__fd(const struct btf *btf);
+LIBBPF_API void btf__get_strings(const struct btf *btf, const char **strings,
+				 __u32 *str_len);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 42524133779b..9b4a1f1bd3bf 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -112,6 +112,8 @@ LIBBPF_0.0.1 {
 		btf__find_by_name;
 		btf__free;
 		btf__get_from_id;
+		btf__get_nr_types;
+		btf__get_strings;
 		btf__name_by_offset;
 		btf__new;
 		btf__resolve_size;
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 179f1d8ec5bf..dcc770245815 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -78,12 +78,21 @@ static int __base_pr(const char *format, ...)
 	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz),	\
 	BTF_INT_ENC(encoding, bits_offset, bits)
 
+#define BTF_FWD_ENC(name, kind_flag) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FWD, kind_flag, 0), 0)
+
 #define BTF_ARRAY_ENC(type, index_type, nr_elems)	\
 	(type), (index_type), (nr_elems)
 #define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \
 	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \
 	BTF_ARRAY_ENC(type, index_type, nr_elems)
 
+#define BTF_STRUCT_ENC(name, nr_elems, sz)	\
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, nr_elems), sz)
+
+#define BTF_UNION_ENC(name, nr_elems, sz)	\
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_UNION, 0, nr_elems), sz)
+
 #define BTF_MEMBER_ENC(name, type, bits_offset)	\
 	(name), (type), (bits_offset)
 #define BTF_ENUM_ENC(name, val) (name), (val)
@@ -99,6 +108,12 @@ static int __base_pr(const char *format, ...)
 #define BTF_CONST_ENC(type) \
 	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), type)
 
+#define BTF_VOLATILE_ENC(type) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), type)
+
+#define BTF_RESTRICT_ENC(type) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), type)
+
 #define BTF_FUNC_PROTO_ENC(ret_type, nargs) \
 	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, nargs), ret_type)
 
@@ -111,6 +126,10 @@ static int __base_pr(const char *format, ...)
 #define BTF_END_RAW 0xdeadbeef
 #define NAME_TBD 0xdeadb33f
 
+#define NAME_NTH(N) (0xffff0000 | N)
+#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000)
+#define GET_NAME_NTH_IDX(X) (X & 0x0000ffff)
+
 #define MAX_NR_RAW_U32 1024
 #define BTF_LOG_BUF_SIZE 65535
 
@@ -119,12 +138,14 @@ static struct args {
 	unsigned int file_test_num;
 	unsigned int get_info_test_num;
 	unsigned int info_raw_test_num;
+	unsigned int dedup_test_num;
 	bool raw_test;
 	bool file_test;
 	bool get_info_test;
 	bool pprint_test;
 	bool always_log;
 	bool info_raw_test;
+	bool dedup_test;
 } args;
 
 static char btf_log_buf[BTF_LOG_BUF_SIZE];
@@ -2835,11 +2856,13 @@ static void *btf_raw_create(const struct btf_header *hdr,
 			    const char **ret_next_str)
 {
 	const char *next_str = str, *end_str = str + str_sec_size;
+	const char **strs_idx = NULL, **tmp_strs_idx;
+	int strs_cap = 0, strs_cnt = 0, next_str_idx = 0;
 	unsigned int size_needed, offset;
 	struct btf_header *ret_hdr;
-	int i, type_sec_size;
+	int i, type_sec_size, err = 0;
 	uint32_t *ret_types;
-	void *raw_btf;
+	void *raw_btf = NULL;
 
 	type_sec_size = get_raw_sec_size(raw_types);
 	if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types"))
@@ -2854,17 +2877,44 @@ static void *btf_raw_create(const struct btf_header *hdr,
 	memcpy(raw_btf, hdr, sizeof(*hdr));
 	offset = sizeof(*hdr);
 
+	/* Index strings */
+	while ((next_str = get_next_str(next_str, end_str))) {
+		if (strs_cnt == strs_cap) {
+			strs_cap += max(16, strs_cap / 2);
+			tmp_strs_idx = realloc(strs_idx,
+					       sizeof(*strs_idx) * strs_cap);
+			if (CHECK(!tmp_strs_idx,
+				  "Cannot allocate memory for strs_idx")) {
+				err = -1;
+				goto done;
+			}
+			strs_idx = tmp_strs_idx;
+		}
+		strs_idx[strs_cnt++] = next_str;
+		next_str += strlen(next_str);
+	}
+
 	/* Copy type section */
 	ret_types = raw_btf + offset;
 	for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) {
 		if (raw_types[i] == NAME_TBD) {
-			next_str = get_next_str(next_str, end_str);
-			if (CHECK(!next_str, "Error in getting next_str")) {
-				free(raw_btf);
-				return NULL;
+			if (CHECK(next_str_idx == strs_cnt,
+				  "Error in getting next_str #%d",
+				  next_str_idx)) {
+				err = -1;
+				goto done;
 			}
-			ret_types[i] = next_str - str;
-			next_str += strlen(next_str);
+			ret_types[i] = strs_idx[next_str_idx++] - str;
+		} else if (IS_NAME_NTH(raw_types[i])) {
+			int idx = GET_NAME_NTH_IDX(raw_types[i]);
+
+			if (CHECK(idx <= 0 || idx > strs_cnt,
+				  "Error getting string #%d, strs_cnt:%d",
+				  idx, strs_cnt)) {
+				err = -1;
+				goto done;
+			}
+			ret_types[i] = strs_idx[idx-1] - str;
 		} else {
 			ret_types[i] = raw_types[i];
 		}
@@ -2881,8 +2931,17 @@ static void *btf_raw_create(const struct btf_header *hdr,
 
 	*btf_size = size_needed;
 	if (ret_next_str)
-		*ret_next_str = next_str;
+		*ret_next_str =
+			next_str_idx < strs_cnt ? strs_idx[next_str_idx] : NULL;
 
+done:
+	if (err) {
+		if (raw_btf)
+			free(raw_btf);
+		if (strs_idx)
+			free(strs_idx);
+		return NULL;
+	}
 	return raw_btf;
 }
 
@@ -5551,20 +5610,450 @@ static int test_info_raw(void)
 	return err;
 }
 
+struct btf_raw_data {
+	__u32 raw_types[MAX_NR_RAW_U32];
+	const char *str_sec;
+	__u32 str_sec_size;
+};
+
+struct btf_dedup_test {
+	const char *descr;
+	struct btf_raw_data input;
+	struct btf_raw_data expect;
+	struct btf_dedup_opts opts;
+};
+
+const struct btf_dedup_test dedup_tests[] = {
+
+{
+	.descr = "dedup: unused strings filtering",
+	.input = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 4),
+			BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 64, 8),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0unused\0int\0foo\0bar\0long"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+			BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0int\0long"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+	},
+},
+{
+	.descr = "dedup: strings deduplication",
+	.input = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+			BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8),
+			BTF_TYPE_INT_ENC(NAME_NTH(3), BTF_INT_SIGNED, 0, 32, 4),
+			BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 64, 8),
+			BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0int\0long int\0int\0long int\0int"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+			BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0int\0long int"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+	},
+},
+{
+	.descr = "dedup: struct example #1",
+	/*
+	 * struct s {
+	 *	struct s *next;
+	 *	const int *a;
+	 *	int b[16];
+	 *	int c;
+	 * }
+	 */
+	.input = {
+		.raw_types = {
+			/* int */
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			/* int[16] */
+			BTF_TYPE_ARRAY_ENC(1, 1, 16),					/* [2] */
+			/* struct s { */
+			BTF_STRUCT_ENC(NAME_NTH(2), 4, 84),				/* [3] */
+				BTF_MEMBER_ENC(NAME_NTH(3), 4, 0),	/* struct s *next;	*/
+				BTF_MEMBER_ENC(NAME_NTH(4), 5, 64),	/* const int *a;	*/
+				BTF_MEMBER_ENC(NAME_NTH(5), 2, 128),	/* int b[16];		*/
+				BTF_MEMBER_ENC(NAME_NTH(6), 1, 640),	/* int c;		*/
+			/* ptr -> [3] struct s */
+			BTF_PTR_ENC(3),							/* [4] */
+			/* ptr -> [6] const int */
+			BTF_PTR_ENC(6),							/* [5] */
+			/* const -> [1] int */
+			BTF_CONST_ENC(1),						/* [6] */
+
+			/* full copy of the above */
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),	/* [7] */
+			BTF_TYPE_ARRAY_ENC(7, 7, 16),					/* [8] */
+			BTF_STRUCT_ENC(NAME_NTH(2), 4, 84),				/* [9] */
+				BTF_MEMBER_ENC(NAME_NTH(3), 10, 0),
+				BTF_MEMBER_ENC(NAME_NTH(4), 11, 64),
+				BTF_MEMBER_ENC(NAME_NTH(5), 8, 128),
+				BTF_MEMBER_ENC(NAME_NTH(6), 7, 640),
+			BTF_PTR_ENC(9),							/* [10] */
+			BTF_PTR_ENC(12),						/* [11] */
+			BTF_CONST_ENC(7),						/* [12] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"),
+	},
+	.expect = {
+		.raw_types = {
+			/* int */
+			BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			/* int[16] */
+			BTF_TYPE_ARRAY_ENC(1, 1, 16),					/* [2] */
+			/* struct s { */
+			BTF_STRUCT_ENC(NAME_NTH(6), 4, 84),				/* [3] */
+				BTF_MEMBER_ENC(NAME_NTH(5), 4, 0),	/* struct s *next;	*/
+				BTF_MEMBER_ENC(NAME_NTH(1), 5, 64),	/* const int *a;	*/
+				BTF_MEMBER_ENC(NAME_NTH(2), 2, 128),	/* int b[16];		*/
+				BTF_MEMBER_ENC(NAME_NTH(3), 1, 640),	/* int c;		*/
+			/* ptr -> [3] struct s */
+			BTF_PTR_ENC(3),							/* [4] */
+			/* ptr -> [6] const int */
+			BTF_PTR_ENC(6),							/* [5] */
+			/* const -> [1] int */
+			BTF_CONST_ENC(1),						/* [6] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+	},
+},
+{
+	.descr = "dedup: all possible kinds (no duplicates)",
+	.input = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 8),		/* [1] int */
+			BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 4),	/* [2] enum */
+				BTF_ENUM_ENC(NAME_TBD, 0),
+				BTF_ENUM_ENC(NAME_TBD, 1),
+			BTF_FWD_ENC(NAME_TBD, 1 /* union kind_flag */),			/* [3] fwd */
+			BTF_TYPE_ARRAY_ENC(2, 1, 7),					/* [4] array */
+			BTF_STRUCT_ENC(NAME_TBD, 1, 4),					/* [5] struct */
+				BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+			BTF_UNION_ENC(NAME_TBD, 1, 4),					/* [6] union */
+				BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+			BTF_TYPEDEF_ENC(NAME_TBD, 1),					/* [7] typedef */
+			BTF_PTR_ENC(0),							/* [8] ptr */
+			BTF_CONST_ENC(8),						/* [9] const */
+			BTF_VOLATILE_ENC(8),						/* [10] volatile */
+			BTF_RESTRICT_ENC(8),						/* [11] restrict */
+			BTF_FUNC_PROTO_ENC(1, 2),					/* [12] func_proto */
+				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
+			BTF_FUNC_ENC(NAME_TBD, 12),					/* [13] func */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 8),		/* [1] int */
+			BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 4),	/* [2] enum */
+				BTF_ENUM_ENC(NAME_TBD, 0),
+				BTF_ENUM_ENC(NAME_TBD, 1),
+			BTF_FWD_ENC(NAME_TBD, 1 /* union kind_flag */),			/* [3] fwd */
+			BTF_TYPE_ARRAY_ENC(2, 1, 7),					/* [4] array */
+			BTF_STRUCT_ENC(NAME_TBD, 1, 4),					/* [5] struct */
+				BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+			BTF_UNION_ENC(NAME_TBD, 1, 4),					/* [6] union */
+				BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+			BTF_TYPEDEF_ENC(NAME_TBD, 1),					/* [7] typedef */
+			BTF_PTR_ENC(0),							/* [8] ptr */
+			BTF_CONST_ENC(8),						/* [9] const */
+			BTF_VOLATILE_ENC(8),						/* [10] volatile */
+			BTF_RESTRICT_ENC(8),						/* [11] restrict */
+			BTF_FUNC_PROTO_ENC(1, 2),					/* [12] func_proto */
+				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
+			BTF_FUNC_ENC(NAME_TBD, 12),					/* [13] func */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+	},
+},
+{
+	.descr = "dedup: no int duplicates",
+	.input = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8),
+			/* different name */
+			BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 8),
+			/* different encoding */
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_CHAR, 0, 32, 8),
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_BOOL, 0, 32, 8),
+			/* different bit offset */
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 8, 32, 8),
+			/* different bit size */
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
+			/* different byte size */
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0int\0some other int"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8),
+			/* different name */
+			BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 8),
+			/* different encoding */
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_CHAR, 0, 32, 8),
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_BOOL, 0, 32, 8),
+			/* different bit offset */
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 8, 32, 8),
+			/* different bit size */
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
+			/* different byte size */
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0int\0some other int"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+	},
+},
+
+};
+
+static int btf_type_size(const struct btf_type *t)
+{
+	int base_size = sizeof(struct btf_type);
+	__u16 vlen = BTF_INFO_VLEN(t->info);
+	__u16 kind = BTF_INFO_KIND(t->info);
+
+	switch (kind) {
+	case BTF_KIND_FWD:
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FUNC:
+		return base_size;
+	case BTF_KIND_INT:
+		return base_size + sizeof(__u32);
+	case BTF_KIND_ENUM:
+		return base_size + vlen * sizeof(struct btf_enum);
+	case BTF_KIND_ARRAY:
+		return base_size + sizeof(struct btf_array);
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+		return base_size + vlen * sizeof(struct btf_member);
+	case BTF_KIND_FUNC_PROTO:
+		return base_size + vlen * sizeof(struct btf_param);
+	default:
+		fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind);
+		return -EINVAL;
+	}
+}
+
+static void dump_btf_strings(const char *strs, __u32 len)
+{
+	const char *cur = strs;
+	int i = 0;
+
+	while (cur < strs + len) {
+		fprintf(stderr, "string #%d: '%s'\n", i, cur);
+		cur += strlen(cur) + 1;
+		i++;
+	}
+}
+
+static int do_test_dedup(unsigned int test_num)
+{
+	const struct btf_dedup_test *test = &dedup_tests[test_num - 1];
+	int err = 0, i;
+	__u32 test_nr_types, expect_nr_types, test_str_len, expect_str_len;
+	void *raw_btf;
+	unsigned int raw_btf_size;
+	struct btf *test_btf = NULL, *expect_btf = NULL;
+	const char *ret_test_next_str, *ret_expect_next_str;
+	const char *test_strs, *expect_strs;
+	const char *test_str_cur, *test_str_end;
+	const char *expect_str_cur, *expect_str_end;
+
+	fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr);
+
+	raw_btf = btf_raw_create(&hdr_tmpl, test->input.raw_types,
+				 test->input.str_sec, test->input.str_sec_size,
+				 &raw_btf_size, &ret_test_next_str);
+	if (!raw_btf)
+		return -1;
+	test_btf = btf__new((__u8 *)raw_btf, raw_btf_size, __base_pr);
+	free(raw_btf);
+	if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld",
+		  PTR_ERR(test_btf))) {
+		err = -1;
+		goto done;
+	}
+
+	raw_btf = btf_raw_create(&hdr_tmpl, test->expect.raw_types,
+				 test->expect.str_sec,
+				 test->expect.str_sec_size,
+				 &raw_btf_size, &ret_expect_next_str);
+	if (!raw_btf)
+		return -1;
+	expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size, __base_pr);
+	free(raw_btf);
+	if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld",
+		  PTR_ERR(expect_btf))) {
+		err = -1;
+		goto done;
+	}
+
+	err = btf__dedup(test_btf, NULL, &test->opts, __base_pr);
+	if (CHECK(err, "btf_dedup failed errno:%d", err)) {
+		err = -1;
+		goto done;
+	}
+
+	btf__get_strings(test_btf, &test_strs, &test_str_len);
+	btf__get_strings(expect_btf, &expect_strs, &expect_str_len);
+	if (CHECK(test_str_len != expect_str_len,
+		  "test_str_len:%u != expect_str_len:%u",
+		  test_str_len, expect_str_len)) {
+		fprintf(stderr, "\ntest strings:\n");
+		dump_btf_strings(test_strs, test_str_len);
+		fprintf(stderr, "\nexpected strings:\n");
+		dump_btf_strings(expect_strs, expect_str_len);
+		err = -1;
+		goto done;
+	}
+
+	test_str_cur = test_strs;
+	test_str_end = test_strs + test_str_len;
+	expect_str_cur = expect_strs;
+	expect_str_end = expect_strs + expect_str_len;
+	while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) {
+		size_t test_len, expect_len;
+
+		test_len = strlen(test_str_cur);
+		expect_len = strlen(expect_str_cur);
+		if (CHECK(test_len != expect_len,
+			  "test_len:%zu != expect_len:%zu "
+			  "(test_str:%s, expect_str:%s)",
+			  test_len, expect_len, test_str_cur, expect_str_cur)) {
+			err = -1;
+			goto done;
+		}
+		if (CHECK(strcmp(test_str_cur, expect_str_cur),
+			  "test_str:%s != expect_str:%s",
+			  test_str_cur, expect_str_cur)) {
+			err = -1;
+			goto done;
+		}
+		test_str_cur += test_len + 1;
+		expect_str_cur += expect_len + 1;
+	}
+	if (CHECK(test_str_cur != test_str_end,
+		  "test_str_cur:%p != test_str_end:%p",
+		  test_str_cur, test_str_end)) {
+		err = -1;
+		goto done;
+	}
+
+	test_nr_types = btf__get_nr_types(test_btf);
+	expect_nr_types = btf__get_nr_types(expect_btf);
+	if (CHECK(test_nr_types != expect_nr_types,
+		  "test_nr_types:%u != expect_nr_types:%u",
+		  test_nr_types, expect_nr_types)) {
+		err = -1;
+		goto done;
+	}
+
+	for (i = 1; i <= test_nr_types; i++) {
+		const struct btf_type *test_type, *expect_type;
+		int test_size, expect_size;
+
+		test_type = btf__type_by_id(test_btf, i);
+		expect_type = btf__type_by_id(expect_btf, i);
+		test_size = btf_type_size(test_type);
+		expect_size = btf_type_size(expect_type);
+
+		if (CHECK(test_size != expect_size,
+			  "type #%d: test_size:%d != expect_size:%u",
+			  i, test_size, expect_size)) {
+			err = -1;
+			goto done;
+		}
+		if (CHECK(memcmp((void *)test_type,
+				 (void *)expect_type,
+				 test_size),
+			  "type #%d: contents differ", i)) {
+			err = -1;
+			goto done;
+		}
+	}
+
+done:
+	if (!err)
+		fprintf(stderr, "OK");
+	if (!IS_ERR(test_btf))
+		btf__free(test_btf);
+	if (!IS_ERR(expect_btf))
+		btf__free(expect_btf);
+
+	return err;
+}
+
+static int test_dedup(void)
+{
+	unsigned int i;
+	int err = 0;
+
+	if (args.dedup_test_num)
+		return count_result(do_test_dedup(args.dedup_test_num));
+
+	for (i = 1; i <= ARRAY_SIZE(dedup_tests); i++)
+		err |= count_result(do_test_dedup(i));
+
+	return err;
+}
+
 static void usage(const char *cmd)
 {
 	fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n"
 			"\t[-g btf_get_info_test_num (1 - %zu)] |\n"
 			"\t[-f btf_file_test_num (1 - %zu)] |\n"
 			"\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n"
-			"\t[-p (pretty print test)]]\n",
+			"\t[-p (pretty print test)] |\n"
+			"\t[-d btf_dedup_test_num (1 - %zu)]]\n",
 		cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
-		ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests));
+		ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests),
+		ARRAY_SIZE(dedup_tests));
 }
 
 static int parse_args(int argc, char **argv)
 {
-	const char *optstr = "lpk:f:r:g:";
+	const char *optstr = "hlpk:f:r:g:d:";
 	int opt;
 
 	while ((opt = getopt(argc, argv, optstr)) != -1) {
@@ -5591,12 +6080,16 @@ static int parse_args(int argc, char **argv)
 			args.info_raw_test_num = atoi(optarg);
 			args.info_raw_test = true;
 			break;
+		case 'd':
+			args.dedup_test_num = atoi(optarg);
+			args.dedup_test = true;
+			break;
 		case 'h':
 			usage(argv[0]);
 			exit(0);
 		default:
-				usage(argv[0]);
-				return -1;
+			usage(argv[0]);
+			return -1;
 		}
 	}
 
@@ -5632,6 +6125,14 @@ static int parse_args(int argc, char **argv)
 		return -1;
 	}
 
+	if (args.dedup_test_num &&
+	    (args.dedup_test_num < 1 ||
+	     args.dedup_test_num > ARRAY_SIZE(dedup_tests))) {
+		fprintf(stderr, "BTF dedup test number must be [1 - %zu]\n",
+			ARRAY_SIZE(dedup_tests));
+		return -1;
+	}
+
 	return 0;
 }
 
@@ -5667,14 +6168,18 @@ int main(int argc, char **argv)
 	if (args.info_raw_test)
 		err |= test_info_raw();
 
+	if (args.dedup_test)
+		err |= test_dedup();
+
 	if (args.raw_test || args.get_info_test || args.file_test ||
-	    args.pprint_test || args.info_raw_test)
+	    args.pprint_test || args.info_raw_test || args.dedup_test)
 		goto done;
 
 	err |= test_raw();
 	err |= test_get_info();
 	err |= test_file();
 	err |= test_info_raw();
+	err |= test_dedup();
 
 done:
 	print_summary();
-- 
2.17.1


^ permalink raw reply related

* [PATCH btf 2/3] btf: add BTF types deduplication algorithm
From: Andrii Nakryiko @ 2019-01-31  6:58 UTC (permalink / raw)
  To: netdev, ast, yhs, kafai, acme, daniel, kernel-team; +Cc: Andrii Nakryiko
In-Reply-To: <20190131065837.3380288-1-andriin@fb.com>

This patch implements BTF types deduplication algorithm. It allows to
greatly compress typical output of pahole's DWARF-to-BTF conversion or
LLVM's compilation output by detecting and collapsing identical types emitted in
isolation per compilation unit. Algorithm also resolves struct/union forward
declarations into concrete BTF types representing referenced struct/union. If
undesired, this resolution can be disabled through specifying corresponding options.

Algorithm itself and its application to Linux kernel's BTF types is
described in details at:
https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/lib/bpf/btf.c      | 1741 ++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/btf.h      |    8 +
 tools/lib/bpf/libbpf.map |    1 +
 3 files changed, 1750 insertions(+)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 79b782963917..981f166b401c 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -780,3 +780,1744 @@ __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)
 {
 	return btf_ext->line_info.rec_size;
 }
+
+struct btf_dedup;
+
+static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
+				       const struct btf_dedup_opts *opts);
+static void btf_dedup_free(struct btf_dedup *d);
+static int btf_dedup_strings(struct btf_dedup *d);
+static int btf_dedup_prim_types(struct btf_dedup *d);
+static int btf_dedup_struct_types(struct btf_dedup *d);
+static int btf_dedup_ref_types(struct btf_dedup *d);
+static int btf_dedup_compact_types(struct btf_dedup *d);
+static int btf_dedup_remap_types(struct btf_dedup *d);
+
+/*
+ * Deduplicate BTF types and strings.
+ *
+ * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF
+ * section with all BTF type descriptors and string data. It overwrites that
+ * memory in-place with deduplicated types and strings without any loss of
+ * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section
+ * is provided, all the strings referenced from .BTF.ext section are honored
+ * and updated to point to the right offsets after deduplication.
+ *
+ * If function returns with error, type/string data might be garbled and should
+ * be discarded.
+ *
+ * More verbose and detailed description of both problem btf_dedup is solving,
+ * as well as solution could be found at:
+ * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html
+ *
+ * Problem description and justification
+ * =====================================
+ *
+ * BTF type information is typically emitted either as a result of conversion
+ * from DWARF to BTF or directly by compiler. In both cases, each compilation
+ * unit contains information about a subset of all the types that are used
+ * in an application. These subsets are frequently overlapping and contain a lot
+ * of duplicated information when later concatenated together into a single
+ * binary. This algorithm ensures that each unique type is represented by single
+ * BTF type descriptor, greatly reducing resulting size of BTF data.
+ *
+ * Compilation unit isolation and subsequent duplication of data is not the only
+ * problem. The same type hierarchy (e.g., struct and all the type that struct
+ * references) in different compilation units can be represented in BTF to
+ * various degrees of completeness (or, rather, incompleteness) due to
+ * struct/union forward declarations.
+ *
+ * Let's take a look at an example, that we'll use to better understand the
+ * problem (and solution). Suppose we have two compilation units, each using
+ * same `struct S`, but each of them having incomplete type information about
+ * struct's fields:
+ *
+ * // CU #1:
+ * struct S;
+ * struct A {
+ *	int a;
+ *	struct A* self;
+ *	struct S* parent;
+ * };
+ * struct B;
+ * struct S {
+ *	struct A* a_ptr;
+ *	struct B* b_ptr;
+ * };
+ *
+ * // CU #2:
+ * struct S;
+ * struct A;
+ * struct B {
+ *	int b;
+ *	struct B* self;
+ *	struct S* parent;
+ * };
+ * struct S {
+ *	struct A* a_ptr;
+ *	struct B* b_ptr;
+ * };
+ *
+ * In case of CU #1, BTF data will know only that `struct B` exist (but no
+ * more), but will know the complete type information about `struct A`. While
+ * for CU #2, it will know full type information about `struct B`, but will
+ * only know about forward declaration of `struct A` (in BTF terms, it will
+ * have `BTF_KIND_FWD` type descriptor with name `B`).
+ *
+ * This compilation unit isolation means that it's possible that there is no
+ * single CU with complete type information describing structs `S`, `A`, and
+ * `B`. Also, we might get tons of duplicated and redundant type information.
+ *
+ * Additional complication we need to keep in mind comes from the fact that
+ * types, in general, can form graphs containing cycles, not just DAGs.
+ *
+ * While algorithm does deduplication, it also merges and resolves type
+ * information (unless disabled throught `struct btf_opts`), whenever possible.
+ * E.g., in the example above with two compilation units having partial type
+ * information for structs `A` and `B`, the output of algorithm will emit
+ * a single copy of each BTF type that describes structs `A`, `B`, and `S`
+ * (as well as type information for `int` and pointers), as if they were defined
+ * in a single compilation unit as:
+ *
+ * struct A {
+ *	int a;
+ *	struct A* self;
+ *	struct S* parent;
+ * };
+ * struct B {
+ *	int b;
+ *	struct B* self;
+ *	struct S* parent;
+ * };
+ * struct S {
+ *	struct A* a_ptr;
+ *	struct B* b_ptr;
+ * };
+ *
+ * Algorithm summary
+ * =================
+ *
+ * Algorithm completes its work in 6 separate passes:
+ *
+ * 1. Strings deduplication.
+ * 2. Primitive types deduplication (int, enum, fwd).
+ * 3. Struct/union types deduplication.
+ * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func
+ *    protos, and const/volatile/restrict modifiers).
+ * 5. Types compaction.
+ * 6. Types remapping.
+ *
+ * Algorithm determines canonical type descriptor, which is a single
+ * representative type for each truly unique type. This canonical type is the
+ * one that will go into final deduplicated BTF type information. For
+ * struct/unions, it is also the type that algorithm will merge additional type
+ * information into (while resolving FWDs), as it discovers it from data in
+ * other CUs. Each input BTF type eventually gets either mapped to itself, if
+ * that type is canonical, or to some other type, if that type is equivalent
+ * and was chosen as canonical representative. This mapping is stored in
+ * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that
+ * FWD type got resolved to.
+ *
+ * To facilitate fast discovery of canonical types, we also maintain canonical
+ * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash
+ * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types
+ * that match that signature. With sufficiently good choice of type signature
+ * hashing function, we can limit number of canonical types for each unique type
+ * signature to a very small number, allowing to find canonical type for any
+ * duplicated type very quickly.
+ *
+ * Struct/union deduplication is the most critical part and algorithm for
+ * deduplicating structs/unions is described in greater details in comments for
+ * `btf_dedup_is_equiv` function.
+ */
+int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
+	       const struct btf_dedup_opts *opts, btf_print_fn_t err_log)
+{
+	struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts);
+	int err;
+
+	if (IS_ERR(d)) {
+		elog("btf_dedup_new failed: %ld", PTR_ERR(d));
+		return -EINVAL;
+	}
+
+	err = btf_dedup_strings(d);
+	if (err < 0) {
+		elog("btf_dedup_strings failed:%d\n", err);
+		goto done;
+	}
+	err = btf_dedup_prim_types(d);
+	if (err < 0) {
+		elog("btf_dedup_prim_types failed:%d\n", err);
+		goto done;
+	}
+	err = btf_dedup_struct_types(d);
+	if (err < 0) {
+		elog("btf_dedup_struct_types failed:%d\n", err);
+		goto done;
+	}
+	err = btf_dedup_ref_types(d);
+	if (err < 0) {
+		elog("btf_dedup_ref_types failed:%d\n", err);
+		goto done;
+	}
+	err = btf_dedup_compact_types(d);
+	if (err < 0) {
+		elog("btf_dedup_compact_types failed:%d\n", err);
+		goto done;
+	}
+	err = btf_dedup_remap_types(d);
+	if (err < 0) {
+		elog("btf_dedup_remap_types failed:%d\n", err);
+		goto done;
+	}
+
+done:
+	btf_dedup_free(d);
+	return err;
+}
+
+#define BTF_DEDUP_TABLE_SIZE_LOG 14
+#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1)
+#define BTF_UNPROCESSED_ID ((__u32)-1)
+#define BTF_IN_PROGRESS_ID ((__u32)-2)
+
+struct btf_dedup_node {
+	struct btf_dedup_node *next;
+	__u32 type_id;
+};
+
+struct btf_dedup {
+	/* .BTF section to be deduped in-place */
+	struct btf *btf;
+	/*
+	 * Optional .BTF.ext section. When provided, any strings referenced
+	 * from it will be taken into account when deduping strings
+	 */
+	struct btf_ext *btf_ext;
+	/*
+	 * This is a map from any type's signature hash to a list of possible
+	 * canonical representative type candidates. Hash collisions are
+	 * ignored, so even types of various kinds can share same list of
+	 * candidates, which is fine because we rely on subsequent
+	 * btf_xxx_equal() checks to authoritatively verify type equality.
+	 */
+	struct btf_dedup_node **dedup_table;
+	/* Canonical types map */
+	__u32 *map;
+	/* Hypothetical mapping, used during type graph equivalence checks */
+	__u32 *hypot_map;
+	__u32 *hypot_list;
+	size_t hypot_cnt;
+	size_t hypot_cap;
+	/* Various option modifying behavior of algorithm */
+	struct btf_dedup_opts opts;
+};
+
+struct btf_str_ptr {
+	const char *str;
+	__u32 new_off;
+	bool used;
+};
+
+struct btf_str_ptrs {
+	struct btf_str_ptr *ptrs;
+	const char *data;
+	__u32 cnt;
+	__u32 cap;
+};
+
+static inline __u32 hash_combine(__u32 h, __u32 value)
+{
+/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
+#define GOLDEN_RATIO_PRIME 0x9e370001UL
+	return h * 37 + value * GOLDEN_RATIO_PRIME;
+#undef GOLDEN_RATIO_PRIME
+}
+
+#define for_each_hash_node(table, hash, node) \
+	for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next)
+
+static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
+{
+	struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
+
+	if (!node)
+		return -ENOMEM;
+	node->type_id = type_id;
+	node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD];
+	d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node;
+	return 0;
+}
+
+static int btf_dedup_hypot_map_add(struct btf_dedup *d,
+				   __u32 from_id, __u32 to_id)
+{
+	if (d->hypot_cnt == d->hypot_cap) {
+		__u32 *new_list;
+
+		d->hypot_cap += max(16, d->hypot_cap / 2);
+		new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap);
+		if (!new_list)
+			return -ENOMEM;
+		d->hypot_list = new_list;
+	}
+	d->hypot_list[d->hypot_cnt++] = from_id;
+	d->hypot_map[from_id] = to_id;
+	return 0;
+}
+
+static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
+{
+	int i;
+
+	for (i = 0; i < d->hypot_cnt; i++)
+		d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID;
+	d->hypot_cnt = 0;
+}
+
+static void btf_dedup_table_free(struct btf_dedup *d)
+{
+	struct btf_dedup_node *head, *tmp;
+	int i;
+
+	if (!d->dedup_table)
+		return;
+
+	for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) {
+		while (d->dedup_table[i]) {
+			tmp = d->dedup_table[i];
+			d->dedup_table[i] = tmp->next;
+			free(tmp);
+		}
+
+		head = d->dedup_table[i];
+		while (head) {
+			tmp = head;
+			head = head->next;
+			free(tmp);
+		}
+	}
+
+	free(d->dedup_table);
+	d->dedup_table = NULL;
+}
+
+static void btf_dedup_free(struct btf_dedup *d)
+{
+	btf_dedup_table_free(d);
+
+	free(d->map);
+	d->map = NULL;
+
+	free(d->hypot_map);
+	d->hypot_map = NULL;
+
+	free(d->hypot_list);
+	d->hypot_list = NULL;
+
+	free(d);
+}
+
+static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
+				       const struct btf_dedup_opts *opts)
+{
+	struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
+	int i, err = 0;
+
+	if (!d)
+		return ERR_PTR(-ENOMEM);
+
+	d->btf = btf;
+	d->btf_ext = btf_ext;
+
+	d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG,
+				sizeof(struct btf_dedup_node *));
+	if (!d->dedup_table) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	d->map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+	if (!d->map) {
+		err = -ENOMEM;
+		goto done;
+	}
+	/* special BTF "void" type is made canonical immediately */
+	d->map[0] = 0;
+	for (i = 1; i <= btf->nr_types; i++)
+		d->map[i] = BTF_UNPROCESSED_ID;
+
+	d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+	if (!d->hypot_map) {
+		err = -ENOMEM;
+		goto done;
+	}
+	for (i = 0; i <= btf->nr_types; i++)
+		d->hypot_map[i] = BTF_UNPROCESSED_ID;
+
+	d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
+
+done:
+	if (err) {
+		btf_dedup_free(d);
+		return ERR_PTR(err);
+	}
+
+	return d;
+}
+
+typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx);
+
+/*
+ * Iterate over all possible places in .BTF and .BTF.ext that can reference
+ * string and pass pointer to it to a provided callback `fn`.
+ */
+static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
+{
+	void *line_data_cur, *line_data_end;
+	int i, j, r, rec_size;
+	struct btf_type *t;
+
+	for (i = 1; i <= d->btf->nr_types; i++) {
+		t = d->btf->types[i];
+		r = fn(&t->name_off, ctx);
+		if (r)
+			return r;
+
+		switch (BTF_INFO_KIND(t->info)) {
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION: {
+			struct btf_member *m = (struct btf_member *)(t + 1);
+			__u16 vlen = BTF_INFO_VLEN(t->info);
+
+			for (j = 0; j < vlen; j++) {
+				r = fn(&m->name_off, ctx);
+				if (r)
+					return r;
+				m++;
+			}
+			break;
+		}
+		case BTF_KIND_ENUM: {
+			struct btf_enum *m = (struct btf_enum *)(t + 1);
+			__u16 vlen = BTF_INFO_VLEN(t->info);
+
+			for (j = 0; j < vlen; j++) {
+				r = fn(&m->name_off, ctx);
+				if (r)
+					return r;
+				m++;
+			}
+			break;
+		}
+		case BTF_KIND_FUNC_PROTO: {
+			struct btf_param *m = (struct btf_param *)(t + 1);
+			__u16 vlen = BTF_INFO_VLEN(t->info);
+
+			for (j = 0; j < vlen; j++) {
+				r = fn(&m->name_off, ctx);
+				if (r)
+					return r;
+				m++;
+			}
+			break;
+		}
+		default:
+			break;
+		}
+	}
+
+	if (!d->btf_ext)
+		return 0;
+
+	line_data_cur = d->btf_ext->line_info.info;
+	line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len;
+	rec_size = d->btf_ext->line_info.rec_size;
+
+	while (line_data_cur < line_data_end) {
+		struct btf_ext_info_sec *sec = line_data_cur;
+		struct bpf_line_info_min *line_info;
+		__u32 num_info = sec->num_info;
+
+		r = fn(&sec->sec_name_off, ctx);
+		if (r)
+			return r;
+
+		line_data_cur += sizeof(struct btf_ext_info_sec);
+		for (i = 0; i < num_info; i++) {
+			line_info = line_data_cur;
+			r = fn(&line_info->file_name_off, ctx);
+			if (r)
+				return r;
+			r = fn(&line_info->line_off, ctx);
+			if (r)
+				return r;
+			line_data_cur += rec_size;
+		}
+	}
+
+	return 0;
+}
+
+static int str_sort_by_content(const void *a1, const void *a2)
+{
+	const struct btf_str_ptr *p1 = a1;
+	const struct btf_str_ptr *p2 = a2;
+
+	return strcmp(p1->str, p2->str);
+}
+
+static int str_sort_by_offset(const void *a1, const void *a2)
+{
+	const struct btf_str_ptr *p1 = a1;
+	const struct btf_str_ptr *p2 = a2;
+
+	if (p1->str != p2->str)
+		return p1->str < p2->str ? -1 : 1;
+	return 0;
+}
+
+static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem)
+{
+	const struct btf_str_ptr *p = pelem;
+
+	if (str_ptr != p->str)
+		return (const char *)str_ptr < p->str ? -1 : 1;
+	return 0;
+}
+
+static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx)
+{
+	struct btf_str_ptrs *strs;
+	struct btf_str_ptr *s;
+
+	if (*str_off_ptr == 0)
+		return 0;
+
+	strs = ctx;
+	s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
+		    sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
+	if (!s)
+		return -EINVAL;
+	s->used = true;
+	return 0;
+}
+
+static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
+{
+	struct btf_str_ptrs *strs;
+	struct btf_str_ptr *s;
+
+	if (*str_off_ptr == 0)
+		return 0;
+
+	strs = ctx;
+	s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
+		    sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
+	if (!s)
+		return -EINVAL;
+	*str_off_ptr = s->new_off;
+	return 0;
+}
+
+/*
+ * Dedup string and filter out those that are not referenced from either .BTF
+ * or .BTF.ext (if provided) sections.
+ *
+ * This is done by building index of all strings in BTF's string section,
+ * then iterating over all entities that can reference strings (e.g., type
+ * names, struct field names, .BTF.ext line info, etc) and marking corresponding
+ * strings as used. After that all used strings are deduped and compacted into
+ * sequential blob of memory and new offsets are calculated. Then all the string
+ * references are iterated again and rewritten using new offsets.
+ */
+static int btf_dedup_strings(struct btf_dedup *d)
+{
+	const struct btf_header *hdr = d->btf->hdr;
+	char *start = (char *)d->btf->nohdr_data + hdr->str_off;
+	char *end = start + d->btf->hdr->str_len;
+	char *p = start, *tmp_strs = NULL;
+	struct btf_str_ptrs strs = {
+		.cnt = 0,
+		.cap = 0,
+		.ptrs = NULL,
+		.data = start,
+	};
+	int i, j, err = 0, grp_idx;
+	bool grp_used;
+
+	/* build index of all strings */
+	while (p < end) {
+		if (strs.cnt + 1 > strs.cap) {
+			struct btf_str_ptr *new_ptrs;
+
+			strs.cap += max(strs.cnt / 2, 16);
+			new_ptrs = realloc(strs.ptrs,
+					   sizeof(strs.ptrs[0]) * strs.cap);
+			if (!new_ptrs) {
+				err = -ENOMEM;
+				goto done;
+			}
+			strs.ptrs = new_ptrs;
+		}
+
+		strs.ptrs[strs.cnt].str = p;
+		strs.ptrs[strs.cnt].used = false;
+
+		p += strlen(p) + 1;
+		strs.cnt++;
+	}
+
+	/* temporary storage for deduplicated strings */
+	tmp_strs = malloc(d->btf->hdr->str_len);
+	if (!tmp_strs) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	/* mark all used strings */
+	strs.ptrs[0].used = true;
+	err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs);
+	if (err)
+		goto done;
+
+	/* sort strings by context, so that we can identify duplicates */
+	qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content);
+
+	/*
+	 * iterate groups of equal strings and if any instance in a group was
+	 * referenced, emit single instance and remember new offset
+	 */
+	p = tmp_strs;
+	grp_idx = 0;
+	grp_used = strs.ptrs[0].used;
+	/* iterate past end to avoid code duplication after loop */
+	for (i = 1; i <= strs.cnt; i++) {
+		/*
+		 * when i == strs.cnt, we want to skip string comparison and go
+		 * straight to handling last group of strings (otherwise we'd
+		 * need to handle last group after the loop w/ duplicated code)
+		 */
+		if (i < strs.cnt &&
+		    !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) {
+			grp_used = grp_used || strs.ptrs[i].used;
+			continue;
+		}
+
+		/*
+		 * this check would have been required after the loop to handle
+		 * last group of strings, but due to <= condition in a loop
+		 * we avoid that duplication
+		 */
+		if (grp_used) {
+			int new_off = p - tmp_strs;
+			__u32 len = strlen(strs.ptrs[grp_idx].str);
+
+			memmove(p, strs.ptrs[grp_idx].str, len + 1);
+			for (j = grp_idx; j < i; j++)
+				strs.ptrs[j].new_off = new_off;
+			p += len + 1;
+		}
+
+		if (i < strs.cnt) {
+			grp_idx = i;
+			grp_used = strs.ptrs[i].used;
+		}
+	}
+
+	/* replace original strings with deduped ones */
+	d->btf->hdr->str_len = p - tmp_strs;
+	memmove(start, tmp_strs, d->btf->hdr->str_len);
+	end = start + d->btf->hdr->str_len;
+
+	/* restore original order for further binary search lookups */
+	qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset);
+
+	/* remap string offsets */
+	err = btf_for_each_str_off(d, btf_str_remap_offset, &strs);
+	if (err)
+		goto done;
+
+	d->btf->hdr->str_len = end - start;
+
+done:
+	free(tmp_strs);
+	free(strs.ptrs);
+	return err;
+}
+
+static __u32 btf_hash_common(struct btf_type *t)
+{
+	__u32 h;
+
+	h = hash_combine(0, t->name_off);
+	h = hash_combine(h, t->info);
+	h = hash_combine(h, t->size);
+	return h;
+}
+
+static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
+{
+	return t1->name_off == t2->name_off &&
+	       t1->info == t2->info &&
+	       t1->size == t2->size;
+}
+
+/* Calculate type signature hash of INT. */
+static __u32 btf_hash_int(struct btf_type *t)
+{
+	__u32 info = *(__u32 *)(t + 1);
+	__u32 h;
+
+	h = btf_hash_common(t);
+	h = hash_combine(h, info);
+	return h;
+}
+
+/* Check structural equality of two INTs. */
+static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
+{
+	__u32 info1, info2;
+
+	if (!btf_equal_common(t1, t2))
+		return false;
+	info1 = *(__u32 *)(t1 + 1);
+	info2 = *(__u32 *)(t2 + 1);
+	return info1 == info2;
+}
+
+/* Calculate type signature hash of ENUM. */
+static __u32 btf_hash_enum(struct btf_type *t)
+{
+	struct btf_enum *member = (struct btf_enum *)(t + 1);
+	__u32 vlen = BTF_INFO_VLEN(t->info);
+	__u32 h = btf_hash_common(t);
+	int i;
+
+	for (i = 0; i < vlen; i++) {
+		h = hash_combine(h, member->name_off);
+		h = hash_combine(h, member->val);
+		member++;
+	}
+	return h;
+}
+
+/* Check structural equality of two ENUMs. */
+static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
+{
+	struct btf_enum *m1, *m2;
+	__u16 vlen;
+	int i;
+
+	if (!btf_equal_common(t1, t2))
+		return false;
+
+	vlen = BTF_INFO_VLEN(t1->info);
+	m1 = (struct btf_enum *)(t1 + 1);
+	m2 = (struct btf_enum *)(t2 + 1);
+	for (i = 0; i < vlen; i++) {
+		if (m1->name_off != m2->name_off || m1->val != m2->val)
+			return false;
+		m1++;
+		m2++;
+	}
+	return true;
+}
+
+/*
+ * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
+ * as referenced type IDs equivalence is established separately during type
+ * graph equivalence check algorithm.
+ */
+static __u32 btf_hash_struct(struct btf_type *t)
+{
+	struct btf_member *member = (struct btf_member *)(t + 1);
+	__u32 vlen = BTF_INFO_VLEN(t->info);
+	__u32 h = btf_hash_common(t);
+	int i;
+
+	for (i = 0; i < vlen; i++) {
+		h = hash_combine(h, member->name_off);
+		h = hash_combine(h, member->offset);
+		/* no hashing of referenced type ID, it can be unresolved yet */
+		member++;
+	}
+	return h;
+}
+
+/*
+ * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
+ * IDs. This check is performed during type graph equivalence check and
+ * referenced types equivalence is checked separately.
+ */
+static bool btf_equal_struct(struct btf_type *t1, struct btf_type *t2)
+{
+	struct btf_member *m1, *m2;
+	__u16 vlen;
+	int i;
+
+	if (!btf_equal_common(t1, t2))
+		return false;
+
+	vlen = BTF_INFO_VLEN(t1->info);
+	m1 = (struct btf_member *)(t1 + 1);
+	m2 = (struct btf_member *)(t2 + 1);
+	for (i = 0; i < vlen; i++) {
+		if (m1->name_off != m2->name_off || m1->offset != m2->offset)
+			return false;
+		m1++;
+		m2++;
+	}
+	return true;
+}
+
+/*
+ * Calculate type signature hash of ARRAY, including referenced type IDs,
+ * under assumption that they were already resolved to canonical type IDs and
+ * are not going to change.
+ */
+static __u32 btf_hash_array(struct btf_type *t)
+{
+	struct btf_array *info = (struct btf_array *)(t + 1);
+	__u32 h = btf_hash_common(t);
+
+	h = hash_combine(h, info->type);
+	h = hash_combine(h, info->index_type);
+	h = hash_combine(h, info->nelems);
+	return h;
+}
+
+/*
+ * Check exact equality of two ARRAYs, taking into account referenced
+ * type IDs, under assumption that they were already resolved to canonical
+ * type IDs and are not going to change.
+ * This function is called during reference types deduplication to compare
+ * ARRAY to potential canonical representative.
+ */
+static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2)
+{
+	struct btf_array *info1, *info2;
+
+	if (!btf_equal_common(t1, t2))
+		return false;
+
+	info1 = (struct btf_array *)(t1 + 1);
+	info2 = (struct btf_array *)(t2 + 1);
+	return info1->type == info2->type &&
+	       info1->index_type == info2->index_type &&
+	       info1->nelems == info2->nelems;
+}
+
+/*
+ * Check structural compatibility of two ARRAYs, ignoring referenced type
+ * IDs. This check is performed during type graph equivalence check and
+ * referenced types equivalence is checked separately.
+ */
+static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
+{
+	struct btf_array *info1, *info2;
+
+	if (!btf_equal_common(t1, t2))
+		return false;
+
+	info1 = (struct btf_array *)(t1 + 1);
+	info2 = (struct btf_array *)(t2 + 1);
+	return info1->nelems == info2->nelems;
+}
+
+/*
+ * Calculate type signature hash of FUNC_PROTO, including referenced type IDs,
+ * under assumption that they were already resolved to canonical type IDs and
+ * are not going to change.
+ */
+static inline __u32 btf_hash_fnproto(struct btf_type *t)
+{
+	struct btf_param *member = (struct btf_param *)(t + 1);
+	__u16 vlen = BTF_INFO_VLEN(t->info);
+	__u32 h = btf_hash_common(t);
+	int i;
+
+	for (i = 0; i < vlen; i++) {
+		h = hash_combine(h, member->name_off);
+		h = hash_combine(h, member->type);
+		member++;
+	}
+	return h;
+}
+
+/*
+ * Check exact equality of two FUNC_PROTOs, taking into account referenced
+ * type IDs, under assumption that they were already resolved to canonical
+ * type IDs and are not going to change.
+ * This function is called during reference types deduplication to compare
+ * FUNC_PROTO to potential canonical representative.
+ */
+static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
+{
+	struct btf_param *m1, *m2;
+	__u16 vlen;
+	int i;
+
+	if (!btf_equal_common(t1, t2))
+		return false;
+
+	vlen = BTF_INFO_VLEN(t1->info);
+	m1 = (struct btf_param *)(t1 + 1);
+	m2 = (struct btf_param *)(t2 + 1);
+	for (i = 0; i < vlen; i++) {
+		if (m1->name_off != m2->name_off || m1->type != m2->type)
+			return false;
+		m1++;
+		m2++;
+	}
+	return true;
+}
+
+/*
+ * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
+ * IDs. This check is performed during type graph equivalence check and
+ * referenced types equivalence is checked separately.
+ */
+static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
+{
+	struct btf_param *m1, *m2;
+	__u16 vlen;
+	int i;
+
+	/* skip return type ID */
+	if (t1->name_off != t2->name_off || t1->info != t2->info)
+		return false;
+
+	vlen = BTF_INFO_VLEN(t1->info);
+	m1 = (struct btf_param *)(t1 + 1);
+	m2 = (struct btf_param *)(t2 + 1);
+	for (i = 0; i < vlen; i++) {
+		if (m1->name_off != m2->name_off)
+			return false;
+		m1++;
+		m2++;
+	}
+	return true;
+}
+
+/*
+ * Deduplicate primitive types, that can't reference other types, by calculating
+ * their type signature hash and comparing them with any possible canonical
+ * candidate. If no canonical candidate matches, type itself is marked as
+ * canonical and is added into `btf_dedup->dedup_table` as another candidate.
+ */
+static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
+{
+	struct btf_type *t = d->btf->types[type_id];
+	struct btf_type *cand;
+	struct btf_dedup_node *cand_node;
+	/* if we don't find equivalent type, then we are canonical */
+	__u32 new_id = type_id;
+	__u32 h;
+
+	switch (BTF_INFO_KIND(t->info)) {
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_ARRAY:
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_FUNC:
+	case BTF_KIND_FUNC_PROTO:
+		return 0;
+
+	case BTF_KIND_INT:
+		h = btf_hash_int(t);
+		for_each_hash_node(d->dedup_table, h, cand_node) {
+			cand = d->btf->types[cand_node->type_id];
+			if (btf_equal_int(t, cand)) {
+				new_id = cand_node->type_id;
+				break;
+			}
+		}
+		break;
+
+	case BTF_KIND_ENUM:
+		h = btf_hash_enum(t);
+		for_each_hash_node(d->dedup_table, h, cand_node) {
+			cand = d->btf->types[cand_node->type_id];
+			if (btf_equal_enum(t, cand)) {
+				new_id = cand_node->type_id;
+				break;
+			}
+		}
+		break;
+
+	case BTF_KIND_FWD:
+		h = btf_hash_common(t);
+		for_each_hash_node(d->dedup_table, h, cand_node) {
+			cand = d->btf->types[cand_node->type_id];
+			if (btf_equal_common(t, cand)) {
+				new_id = cand_node->type_id;
+				break;
+			}
+		}
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	d->map[type_id] = new_id;
+	if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int btf_dedup_prim_types(struct btf_dedup *d)
+{
+	int i, err;
+
+	for (i = 1; i <= d->btf->nr_types; i++) {
+		err = btf_dedup_prim_type(d, i);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/*
+ * Check whether type is already mapped into canonical one (could be to itself).
+ */
+static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id)
+{
+	return d->map[type_id] <= BTF_MAX_TYPE;
+}
+
+/*
+ * Resolve type ID into its canonical type ID, if any; otherwise return original
+ * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow
+ * STRUCT/UNION link and resolve it into canonical type ID as well.
+ */
+static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id)
+{
+	while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
+		type_id = d->map[type_id];
+	return type_id;
+}
+
+/*
+ * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original
+ * type ID.
+ */
+static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
+{
+	__u32 orig_type_id = type_id;
+
+	if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD)
+		return type_id;
+
+	while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
+		type_id = d->map[type_id];
+
+	if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD)
+		return type_id;
+
+	return orig_type_id;
+}
+
+
+static inline __u16 btf_fwd_kind(struct btf_type *t)
+{
+	return BTF_INFO_KFLAG(t->info) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
+}
+
+/*
+ * Check equivalence of BTF type graph formed by candidate struct/union (we'll
+ * call it "candidate graph" in this description for brevity) to a type graph
+ * formed by (potential) canonical struct/union ("canonical graph" for brevity
+ * here, though keep in mind that not all types in canonical graph are
+ * necessarily canonical representatives themselves, some of them might be
+ * duplicates or its uniqueness might not have been established yet).
+ * Returns:
+ *  - >0, if type graphs are equivalent;
+ *  -  0, if not equivalent;
+ *  - <0, on error.
+ *
+ * Algorithm performs side-by-side DFS traversal of both type graphs and checks
+ * equivalence of BTF types at each step. If at any point BTF types in candidate
+ * and canonical graphs are not compatible structurally, whole graphs are
+ * incompatible. If types are structurally equivalent (i.e., all information
+ * except referenced type IDs is exactly the same), a mapping from `canon_id` to
+ * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`).
+ * If a type references other types, then those referenced types are checked
+ * for equivalence recursively.
+ *
+ * During DFS traversal, if we find that for current `canon_id` type we
+ * already have some mapping in hypothetical map, we check for two possible
+ * situations:
+ *   - `canon_id` is mapped to exactly the same type as `cand_id`. This will
+ *     happen when type graphs have cycles. In this case we assume those two
+ *     types are equivalent.
+ *   - `canon_id` is mapped to different type. This is contradiction in our
+ *     hypothetical mapping, because same graph in canonical graph corresponds
+ *     to two different types in candidate graph, which for equivalent type
+ *     graphs shouldn't happen. This condition terminates equivalence check
+ *     with negative result.
+ *
+ * If type graphs traversal exhausts types to check and find no contradiction,
+ * then type graphs are equivalent.
+ *
+ * When checking types for equivalence, there is one special case: FWD types.
+ * If FWD type resolution is allowed and one of the types (either from canonical
+ * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind
+ * flag) and their names match, hypothetical mapping is updated to point from
+ * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully,
+ * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently.
+ *
+ * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution,
+ * if there are two exactly named (or anonymous) structs/unions that are
+ * compatible structurally, one of which has FWD field, while other is concrete
+ * STRUCT/UNION, but according to C sources they are different structs/unions
+ * that are referencing different types with the same name. This is extremely
+ * unlikely to happen, but btf_dedup API allows to disable FWD resolution if
+ * this logic is causing problems.
+ *
+ * Doing FWD resolution means that both candidate and/or canonical graphs can
+ * consists of portions of the graph that come from multiple compilation units.
+ * This is due to the fact that types within single compilation unit are always
+ * deduplicated and FWDs are already resolved, if referenced struct/union
+ * definiton is available. So, if we had unresolved FWD and found corresponding
+ * STRUCT/UNION, they will be from different compilation units. This
+ * consequently means that when we "link" FWD to corresponding STRUCT/UNION,
+ * type graph will likely have at least two different BTF types that describe
+ * same type (e.g., most probably there will be two different BTF types for the
+ * same 'int' primitive type) and could even have "overlapping" parts of type
+ * graph that describe same subset of types.
+ *
+ * This in turn means that our assumption that each type in canonical graph
+ * must correspond to exactly one type in candidate graph might not hold
+ * anymore and will make it harder to detect contradictions using hypothetical
+ * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION
+ * resolution only in canonical graph. FWDs in candidate graphs are never
+ * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs
+ * that can occur:
+ *   - Both types in canonical and candidate graphs are FWDs. If they are
+ *     structurally equivalent, then they can either be both resolved to the
+ *     same STRUCT/UNION or not resolved at all. In both cases they are
+ *     equivalent and there is no need to resolve FWD on candidate side.
+ *   - Both types in canonical and candidate graphs are concrete STRUCT/UNION,
+ *     so nothing to resolve as well, algorithm will check equivalence anyway.
+ *   - Type in canonical graph is FWD, while type in candidate is concrete
+ *     STRUCT/UNION. In this case candidate graph comes from single compilation
+ *     unit, so there is exactly one BTF type for each unique C type. After
+ *     resolving FWD into STRUCT/UNION, there might be more than one BTF type
+ *     in canonical graph mapping to single BTF type in candidate graph, but
+ *     because hypothetical mapping maps from canonical to candidate types, it's
+ *     alright, and we still maintain the property of having single `canon_id`
+ *     mapping to single `cand_id` (there could be two different `canon_id`
+ *     mapped to the same `cand_id`, but it's not contradictory).
+ *   - Type in canonical graph is concrete STRUCT/UNION, while type in candidate
+ *     graph is FWD. In this case we are just going to check compatibility of
+ *     STRUCT/UNION and corresponding FWD, and if they are compatible, we'll
+ *     assume that whatever STRUCT/UNION FWD resolves to must be equivalent to
+ *     a concrete STRUCT/UNION from canonical graph. If the rest of type graphs
+ *     turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from
+ *     canonical graph.
+ */
+static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
+			      __u32 canon_id)
+{
+	struct btf_type *cand_type;
+	struct btf_type *canon_type;
+	__u32 hypot_type_id;
+	__u16 cand_kind;
+	__u16 canon_kind;
+	int i, eq;
+
+	/* if both resolve to the same canonical, they must be equivalent */
+	if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id))
+		return 1;
+
+	canon_id = resolve_fwd_id(d, canon_id);
+
+	hypot_type_id = d->hypot_map[canon_id];
+	if (hypot_type_id <= BTF_MAX_TYPE)
+		return hypot_type_id == cand_id;
+
+	if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
+		return -ENOMEM;
+
+	cand_type = d->btf->types[cand_id];
+	canon_type = d->btf->types[canon_id];
+	cand_kind = BTF_INFO_KIND(cand_type->info);
+	canon_kind = BTF_INFO_KIND(canon_type->info);
+
+	if (cand_type->name_off != canon_type->name_off)
+		return 0;
+
+	/* FWD <--> STRUCT/UNION equivalence check, if enabled */
+	if (!d->opts.dont_resolve_fwds
+	    && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
+	    && cand_kind != canon_kind) {
+		__u16 real_kind;
+		__u16 fwd_kind;
+
+		if (cand_kind == BTF_KIND_FWD) {
+			real_kind = canon_kind;
+			fwd_kind = btf_fwd_kind(cand_type);
+		} else {
+			real_kind = cand_kind;
+			fwd_kind = btf_fwd_kind(canon_type);
+		}
+		return fwd_kind == real_kind;
+	}
+
+	if (cand_type->info != canon_type->info)
+		return 0;
+
+	switch (cand_kind) {
+	case BTF_KIND_INT:
+		return btf_equal_int(cand_type, canon_type);
+
+	case BTF_KIND_ENUM:
+		return btf_equal_enum(cand_type, canon_type);
+
+	case BTF_KIND_FWD:
+		return btf_equal_common(cand_type, canon_type);
+
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FUNC:
+		return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
+
+	case BTF_KIND_ARRAY: {
+		struct btf_array *cand_arr, *canon_arr;
+
+		if (!btf_compat_array(cand_type, canon_type))
+			return 0;
+		cand_arr = (struct btf_array *)(cand_type + 1);
+		canon_arr = (struct btf_array *)(canon_type + 1);
+		eq = btf_dedup_is_equiv(d,
+			cand_arr->index_type, canon_arr->index_type);
+		if (eq <= 0)
+			return eq;
+		return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type);
+	}
+
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		struct btf_member *cand_m, *canon_m;
+		__u16 vlen;
+
+		if (!btf_equal_struct(cand_type, canon_type))
+			return 0;
+		vlen = BTF_INFO_VLEN(cand_type->info);
+		cand_m = (struct btf_member *)(cand_type + 1);
+		canon_m = (struct btf_member *)(canon_type + 1);
+		for (i = 0; i < vlen; i++) {
+			eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type);
+			if (eq <= 0)
+				return eq;
+			cand_m++;
+			canon_m++;
+		}
+
+		return 1;
+	}
+
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *cand_p, *canon_p;
+		__u16 vlen;
+
+		if (!btf_compat_fnproto(cand_type, canon_type))
+			return 0;
+		eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
+		if (eq <= 0)
+			return eq;
+		vlen = BTF_INFO_VLEN(cand_type->info);
+		cand_p = (struct btf_param *)(cand_type + 1);
+		canon_p = (struct btf_param *)(canon_type + 1);
+		for (i = 0; i < vlen; i++) {
+			eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type);
+			if (eq <= 0)
+				return eq;
+			cand_p++;
+			canon_p++;
+		}
+		return 1;
+	}
+
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * Use hypothetical mapping, produced by successful type graph equivalence
+ * check, to augment existing struct/union canonical mapping, where possible.
+ *
+ * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record
+ * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional:
+ * it doesn't matter if FWD type was part of canonical graph or candidate one,
+ * we are recording the mapping anyway. As opposed to carefulness required
+ * for struct/union correspondence mapping (described below), for FWD resolution
+ * it's not important, as by the time that FWD type (reference type) will be
+ * deduplicated all structs/unions will be deduped already anyway.
+ *
+ * Recording STRUCT/UNION mapping is purely a performance optimization and is
+ * not required for correctness. It needs to be done carefully to ensure that
+ * struct/union from candidate's type graph is not mapped into corresponding
+ * struct/union from canonical type graph that itself hasn't been resolved into
+ * canonical representative. The only guarantee we have is that canonical
+ * struct/union was determined as canonical and that won't change. But any
+ * types referenced through that struct/union fields could have been not yet
+ * resolved, so in case like that it's too early to establish any kind of
+ * correspondence between structs/unions.
+ *
+ * No canonical correspondence is derived for primitive types (they are already
+ * deduplicated completely already anyway) or reference types (they rely on
+ * stability of struct/union canonical relationship for equivalence checks).
+ */
+static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
+{
+	__u32 cand_type_id, targ_type_id;
+	__u16 t_kind, c_kind;
+	__u32 t_id, c_id;
+	int i;
+
+	for (i = 0; i < d->hypot_cnt; i++) {
+		cand_type_id = d->hypot_list[i];
+		targ_type_id = d->hypot_map[cand_type_id];
+		t_id = resolve_type_id(d, targ_type_id);
+		c_id = resolve_type_id(d, cand_type_id);
+		t_kind = BTF_INFO_KIND(d->btf->types[t_id]->info);
+		c_kind = BTF_INFO_KIND(d->btf->types[c_id]->info);
+		/*
+		 * Resolve FWD into STRUCT/UNION.
+		 * It's ok to resolve FWD into STRUCT/UNION that's not yet
+		 * mapped to canonical representative (as opposed to
+		 * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because
+		 * eventually that struct is going to be mapped and all resolved
+		 * FWDs will automatically resolve to correct canonical
+		 * representative. This will happen before ref type deduping,
+		 * which critically depends on stability of these mapping. This
+		 * stability is not a requirement for STRUCT/UNION equivalence
+		 * checks, though.
+		 */
+		if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD)
+			d->map[c_id] = t_id;
+		else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
+			d->map[t_id] = c_id;
+
+		if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) &&
+		    c_kind != BTF_KIND_FWD &&
+		    is_type_mapped(d, c_id) &&
+		    !is_type_mapped(d, t_id)) {
+			/*
+			 * as a perf optimization, we can map struct/union
+			 * that's part of type graph we just verified for
+			 * equivalence. We can do that for struct/union that has
+			 * canonical representative only, though.
+			 */
+			d->map[t_id] = c_id;
+		}
+	}
+}
+
+/*
+ * Deduplicate struct/union types.
+ *
+ * For each struct/union type its type signature hash is calculated, taking
+ * into account type's name, size, number, order and names of fields, but
+ * ignoring type ID's referenced from fields, because they might not be deduped
+ * completely until after reference types deduplication phase. This type hash
+ * is used to iterate over all potential canonical types, sharing same hash.
+ * For each canonical candidate we check whether type graphs that they form
+ * (through referenced types in fields and so on) are equivalent using algorithm
+ * implemented in `btf_dedup_is_equiv`. If such equivalence is found and
+ * BTF_KIND_FWD resolution is allowed, then hypothetical mapping
+ * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence
+ * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to
+ * potentially map other structs/unions to their canonical representatives,
+ * if such relationship hasn't yet been established. This speeds up algorithm
+ * by eliminating some of the duplicate work.
+ *
+ * If no matching canonical representative was found, struct/union is marked
+ * as canonical for itself and is added into btf_dedup->dedup_table hash map
+ * for further look ups.
+ */
+static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
+{
+	struct btf_dedup_node *cand_node;
+	struct btf_type *t;
+	/* if we don't find equivalent type, then we are canonical */
+	__u32 new_id = type_id;
+	__u16 kind;
+	__u32 h;
+
+	/* already deduped or is in process of deduping (loop detected) */
+	if (d->map[type_id] <= BTF_MAX_TYPE)
+		return 0;
+
+	t = d->btf->types[type_id];
+	kind = BTF_INFO_KIND(t->info);
+
+	if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
+		return 0;
+
+	h = btf_hash_struct(t);
+	for_each_hash_node(d->dedup_table, h, cand_node) {
+		int eq;
+
+		btf_dedup_clear_hypot_map(d);
+		eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id);
+		if (eq < 0)
+			return eq;
+		if (!eq)
+			continue;
+		new_id = cand_node->type_id;
+		btf_dedup_merge_hypot_map(d);
+		break;
+	}
+
+	d->map[type_id] = new_id;
+	if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int btf_dedup_struct_types(struct btf_dedup *d)
+{
+	int i, err;
+
+	for (i = 1; i <= d->btf->nr_types; i++) {
+		err = btf_dedup_struct_type(d, i);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/*
+ * Deduplicate reference type.
+ *
+ * Once all primitive and struct/union types got deduplicated, we can easily
+ * deduplicate all other (reference) BTF types. This is done in two steps:
+ *
+ * 1. Resolve all referenced type IDs into their canonical type IDs. This
+ * resolution can be done either immediately for primitive or struct/union types
+ * (because they were deduped in previous two phases) or recursively for
+ * reference types. Recursion will always terminate at either primitive or
+ * struct/union type, at which point we can "unwind" chain of reference types
+ * one by one. There is no danger of encountering cycles because in C type
+ * system the only way to form type cycle is through struct/union, so any chain
+ * of reference types, even those taking part in a type cycle, will inevitably
+ * reach struct/union at some point.
+ *
+ * 2. Once all referenced type IDs are resolved into canonical ones, BTF type
+ * becomes "stable", in the sense that no further deduplication will cause
+ * any changes to it. With that, it's now possible to calculate type's signature
+ * hash (this time taking into account referenced type IDs) and loop over all
+ * potential canonical representatives. If no match was found, current type
+ * will become canonical representative of itself and will be added into
+ * btf_dedup->dedup_table as another possible canonical representative.
+ */
+static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
+{
+	struct btf_dedup_node *cand_node;
+	struct btf_type *t, *cand;
+	/* if we don't find equivalent type, then we are representative type */
+	__u32 new_id = type_id;
+	__u32 h, ref_type_id;
+
+	if (d->map[type_id] == BTF_IN_PROGRESS_ID)
+		return -ELOOP;
+	if (d->map[type_id] <= BTF_MAX_TYPE)
+		return resolve_type_id(d, type_id);
+
+	t = d->btf->types[type_id];
+	d->map[type_id] = BTF_IN_PROGRESS_ID;
+
+	switch (BTF_INFO_KIND(t->info)) {
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FUNC:
+		ref_type_id = btf_dedup_ref_type(d, t->type);
+		if (ref_type_id < 0)
+			return ref_type_id;
+		t->type = ref_type_id;
+
+		h = btf_hash_common(t);
+		for_each_hash_node(d->dedup_table, h, cand_node) {
+			cand = d->btf->types[cand_node->type_id];
+			if (btf_equal_common(t, cand)) {
+				new_id = cand_node->type_id;
+				break;
+			}
+		}
+		break;
+
+	case BTF_KIND_ARRAY: {
+		struct btf_array *info = (struct btf_array *)(t + 1);
+
+		ref_type_id = btf_dedup_ref_type(d, info->type);
+		if (ref_type_id < 0)
+			return ref_type_id;
+		info->type = ref_type_id;
+
+		ref_type_id = btf_dedup_ref_type(d, info->index_type);
+		if (ref_type_id < 0)
+			return ref_type_id;
+		info->index_type = ref_type_id;
+
+		h = btf_hash_array(t);
+		for_each_hash_node(d->dedup_table, h, cand_node) {
+			cand = d->btf->types[cand_node->type_id];
+			if (btf_equal_array(t, cand)) {
+				new_id = cand_node->type_id;
+				break;
+			}
+		}
+		break;
+	}
+
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *param;
+		__u16 vlen;
+		int i;
+
+		ref_type_id = btf_dedup_ref_type(d, t->type);
+		if (ref_type_id < 0)
+			return ref_type_id;
+		t->type = ref_type_id;
+
+		vlen = BTF_INFO_VLEN(t->info);
+		param = (struct btf_param *)(t + 1);
+		for (i = 0; i < vlen; i++) {
+			ref_type_id = btf_dedup_ref_type(d, param->type);
+			if (ref_type_id < 0)
+				return ref_type_id;
+			param->type = ref_type_id;
+			param++;
+		}
+
+		h = btf_hash_fnproto(t);
+		for_each_hash_node(d->dedup_table, h, cand_node) {
+			cand = d->btf->types[cand_node->type_id];
+			if (btf_equal_fnproto(t, cand)) {
+				new_id = cand_node->type_id;
+				break;
+			}
+		}
+		break;
+	}
+
+	default:
+		return -EINVAL;
+	}
+
+	d->map[type_id] = new_id;
+	if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
+		return -ENOMEM;
+
+	return new_id;
+}
+
+static int btf_dedup_ref_types(struct btf_dedup *d)
+{
+	int i, err;
+
+	for (i = 1; i <= d->btf->nr_types; i++) {
+		err = btf_dedup_ref_type(d, i);
+		if (err < 0)
+			return err;
+	}
+	btf_dedup_table_free(d);
+	return 0;
+}
+
+/*
+ * Compact types.
+ *
+ * After we established for each type its corresponding canonical representative
+ * type, we now can eliminate types that are not canonical and leave only
+ * canonical ones layed out sequentially in memory by copying them over
+ * duplicates. During compaction btf_dedup->hypot_map array is reused to store
+ * a map from original type ID to a new compacted type ID, which will be used
+ * during next phase to "fix up" type IDs, referenced from struct/union and
+ * reference types.
+ */
+static int btf_dedup_compact_types(struct btf_dedup *d)
+{
+	struct btf_type **new_types;
+	__u32 next_type_id = 1;
+	char *types_start, *p;
+	int i, len;
+
+	/* we are going to reuse hypot_map to store compaction remapping */
+	d->hypot_map[0] = 0;
+	for (i = 1; i <= d->btf->nr_types; i++)
+		d->hypot_map[i] = BTF_UNPROCESSED_ID;
+
+	types_start = d->btf->nohdr_data + d->btf->hdr->type_off;
+	p = types_start;
+
+	for (i = 1; i <= d->btf->nr_types; i++) {
+		if (d->map[i] != i)
+			continue;
+
+		len = btf_type_size(d->btf->types[i], NULL);
+		if (len < 0)
+			return len;
+
+		memmove(p, d->btf->types[i], len);
+		d->hypot_map[i] = next_type_id;
+		d->btf->types[next_type_id] = (struct btf_type *)p;
+		p += len;
+		next_type_id++;
+	}
+
+	/* shrink struct btf's internal types index and update btf_header */
+	d->btf->nr_types = next_type_id - 1;
+	d->btf->types_size = d->btf->nr_types;
+	d->btf->hdr->type_len = p - types_start;
+	new_types = realloc(d->btf->types,
+			    (1 + d->btf->nr_types) * sizeof(struct btf_type *));
+	if (!new_types)
+		return -ENOMEM;
+	d->btf->types = new_types;
+
+	/* make sure string section follows type information without gaps */
+	d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data;
+	memmove(p, d->btf->strings, d->btf->hdr->str_len);
+	d->btf->strings = p;
+	p += d->btf->hdr->str_len;
+
+	d->btf->data_size = p - (char *)d->btf->data;
+	return 0;
+}
+
+/*
+ * Figure out final (deduplicated and compacted) type ID for provided original
+ * `type_id` by first resolving it into corresponding canonical type ID and
+ * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map,
+ * which is populated during compaction phase.
+ */
+static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
+{
+	__u32 resolved_type_id, new_type_id;
+
+	resolved_type_id = resolve_type_id(d, type_id);
+	new_type_id = d->hypot_map[resolved_type_id];
+	if (new_type_id > BTF_MAX_TYPE)
+		return -EINVAL;
+	return new_type_id;
+}
+
+/*
+ * Remap referenced type IDs into deduped type IDs.
+ *
+ * After BTF types are deduplicated and compacted, their final type IDs may
+ * differ from original ones. The map from original to a corresponding
+ * deduped type ID is stored in btf_dedup->hypot_map and is populated during
+ * compaction phase. During remapping phase we are rewriting all type IDs
+ * referenced from any BTF type (e.g., struct fields, func proto args, etc) to
+ * their final deduped type IDs.
+ */
+static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
+{
+	struct btf_type *t = d->btf->types[type_id];
+	int i, r;
+
+	switch (BTF_INFO_KIND(t->info)) {
+	case BTF_KIND_INT:
+	case BTF_KIND_ENUM:
+		break;
+
+	case BTF_KIND_FWD:
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FUNC:
+		r = btf_dedup_remap_type_id(d, t->type);
+		if (r < 0)
+			return r;
+		t->type = r;
+		break;
+
+	case BTF_KIND_ARRAY: {
+		struct btf_array *arr_info = (struct btf_array *)(t + 1);
+
+		r = btf_dedup_remap_type_id(d, arr_info->type);
+		if (r < 0)
+			return r;
+		arr_info->type = r;
+		r = btf_dedup_remap_type_id(d, arr_info->index_type);
+		if (r < 0)
+			return r;
+		arr_info->index_type = r;
+		break;
+	}
+
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		struct btf_member *member = (struct btf_member *)(t + 1);
+		__u16 vlen = BTF_INFO_VLEN(t->info);
+
+		for (i = 0; i < vlen; i++) {
+			r = btf_dedup_remap_type_id(d, member->type);
+			if (r < 0)
+				return r;
+			member->type = r;
+			member++;
+		}
+		break;
+	}
+
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *param = (struct btf_param *)(t + 1);
+		__u16 vlen = BTF_INFO_VLEN(t->info);
+
+		r = btf_dedup_remap_type_id(d, t->type);
+		if (r < 0)
+			return r;
+		t->type = r;
+
+		for (i = 0; i < vlen; i++) {
+			r = btf_dedup_remap_type_id(d, param->type);
+			if (r < 0)
+				return r;
+			param->type = r;
+			param++;
+		}
+		break;
+	}
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int btf_dedup_remap_types(struct btf_dedup *d)
+{
+	int i, r;
+
+	for (i = 1; i <= d->btf->nr_types; i++) {
+		r = btf_dedup_remap_type(d, i);
+		if (r < 0)
+			return r;
+	}
+	return 0;
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index b0610dcdae6b..0c1d739fd4e8 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -83,6 +83,14 @@ int btf_ext__reloc_line_info(const struct btf *btf,
 __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
 __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
 
+struct btf_dedup_opts {
+	bool dont_resolve_fwds;
+};
+
+LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
+			  const struct btf_dedup_opts *opts,
+			  btf_print_fn_t err_log);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 266bc95d0142..42524133779b 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -107,6 +107,7 @@ LIBBPF_0.0.1 {
 		bpf_set_link_xdp_fd;
 		bpf_task_fd_query;
 		bpf_verify_program;
+		btf__dedup;
 		btf__fd;
 		btf__find_by_name;
 		btf__free;
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH bpf-next] bpf: add optional memory accounting for maps
From: Y Song @ 2019-01-31  7:15 UTC (permalink / raw)
  To: Martynas Pumputis; +Cc: netdev, Alexei Starovoitov, Daniel Borkmann
In-Reply-To: <20190130140251.23784-1-m@lambda.lt>

On Wed, Jan 30, 2019 at 6:05 AM Martynas Pumputis <m@lambda.lt> wrote:
>
> Previously, memory allocated for a map was not accounted. Therefore,
> this memory could not be taken into consideration by the cgroups
> memory controller.
>
> This patch introduces the "BPF_F_ACCOUNT_MEM" flag which enables
> the memory accounting for a map, and it can be set during
> the map creation ("BPF_MAP_CREATE") in "map_flags".
>
> When enabled, we account only that amount of memory which is charged
> against the "RLIMIT_MEMLOCK" limit.
>
> To validate the change, first we create the memory cgroup "test-map":
>
>     # mkdir /sys/fs/cgroup/memory/test-map
>
> And then we run the following program against the cgroup:
>
>     $ cat test_map.c
>     <..>
>     int main() {
>         usleep(3 * 1000000);
>         assert(bpf_create_map(BPF_MAP_TYPE_HASH, 8, 16, 65536, 0) > 0);
>         usleep(3 * 1000000);
>     }
>     # cgexec -g memory:test-map ./test_map &
>     # cat /sys/fs/cgroup/memory/test-map/memory{,.kmem}.usage_in_bytes
>     397312
>     258048
>
>     <after 3 sec the map has been created>
>
>     # bpftool map list
>     19: hash  flags 0x0
>         key 8B  value 16B  max_entries 65536  memlock 5771264B
>     # cat /sys/fs/cgroup/memory/test-map/memory{,.kmem}.usage_in_bytes
>     401408
>     262144
>
> As we can see, the memory allocated for map is not accounted, as
> 397312B + 5771264B > 401408B.
>
> Next, we enabled the accounting and re-run the test:
>
>     $ cat test_map.c
>     <..>
>     int main() {
>         usleep(3 * 1000000);
>         assert(bpf_create_map(BPF_MAP_TYPE_HASH, 8, 16, 65536, BPF_F_ACCOUNT_MEM) > 0);
>         usleep(3 * 1000000);
>     }
>     # cgexec -g memory:test-map ./test_map &
>     # cat /sys/fs/cgroup/memory/test-map/memory{,.kmem}.usage_in_bytes
>     450560
>     307200
>
>     <after 3 sec the map has been created>
>
>     # bpftool map list
>     20: hash  flags 0x80
>         key 8B  value 16B  max_entries 65536  memlock 5771264B
>     # cat /sys/fs/cgroup/memory/test-map/memory{,.kmem}.usage_in_bytes
>     6221824
>     6078464
>
> This time, the memory (including kmem) is accounted, as
> 450560B + 5771264B <= 6221824B

The above test result is for cgroup v1.
I also tested the patch for cgroup v2. It works fine too.

>
> Signed-off-by: Martynas Pumputis <m@lambda.lt>
> ---
>  include/linux/bpf.h            |  5 +++--
>  include/uapi/linux/bpf.h       |  2 ++
>  kernel/bpf/arraymap.c          | 14 +++++++++-----
>  kernel/bpf/bpf_lru_list.c      | 11 +++++++++--
>  kernel/bpf/bpf_lru_list.h      |  1 +
>  kernel/bpf/cpumap.c            | 12 +++++++++---
>  kernel/bpf/devmap.c            | 10 ++++++++--
>  kernel/bpf/hashtab.c           | 19 ++++++++++++++-----
>  kernel/bpf/lpm_trie.c          | 19 +++++++++++++------
>  kernel/bpf/queue_stack_maps.c  |  5 +++--
>  kernel/bpf/reuseport_array.c   |  3 ++-
>  kernel/bpf/stackmap.c          | 12 ++++++++----
>  kernel/bpf/syscall.c           | 12 ++++++++----
>  kernel/bpf/xskmap.c            |  9 +++++++--
>  net/core/sock_map.c            | 13 +++++++++----
>  tools/include/uapi/linux/bpf.h |  3 +++
>  16 files changed, 108 insertions(+), 42 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index e734f163bd0b..353a3f4304fe 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -79,7 +79,8 @@ struct bpf_map {
>         u32 btf_value_type_id;
>         struct btf *btf;
>         bool unpriv_array;
> -       /* 55 bytes hole */
> +       bool account_mem;
> +       /* 54 bytes hole */
>
>         /* The 3rd and 4th cacheline with misc members to avoid false sharing
>          * particularly with refcounting.
> @@ -506,7 +507,7 @@ void bpf_map_put(struct bpf_map *map);
>  int bpf_map_precharge_memlock(u32 pages);
>  int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
>  void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
> -void *bpf_map_area_alloc(size_t size, int numa_node);
> +void *bpf_map_area_alloc(size_t size, int numa_node, bool account_mem);
>  void bpf_map_area_free(void *base);
>  void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 91c43884f295..a374ccbaa51b 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -278,6 +278,8 @@ enum bpf_attach_type {
>  #define BPF_F_NO_COMMON_LRU    (1U << 1)
>  /* Specify numa node during map creation */
>  #define BPF_F_NUMA_NODE                (1U << 2)
> +/* Enable memory accounting for map */
> +#define BPF_F_ACCOUNT_MEM      (1U << 7)

Could you put the above definition after BPF_F_ZERO_SEED (1U << 6)?
The same for tools.
With this change, you can add my ack:
Acked-by: Yonghong Song <yhs@fb.com>

^ permalink raw reply

* Re: [PATCH btf 0/3] Add BTF types deduplication algorithm
From: Yonghong Song @ 2019-01-31  7:18 UTC (permalink / raw)
  To: Andrii Nakryiko, netdev@vger.kernel.org, Alexei Starovoitov,
	Martin Lau, acme@kernel.org, daniel@iogearbox.net, Kernel Team,
	Edward Cree
In-Reply-To: <20190131065837.3380288-1-andriin@fb.com>


add Edward.

On 1/30/19 10:58 PM, Andrii Nakryiko wrote:
> This patch series adds BTF deduplication algorithm to libbpf. This algorithm
> allows to take BTF type information containing duplicate per-compilation unit
> information and reduce it to equivalent set of BTF types with no duplication without
> loss of information. It also deduplicates strings and removes those strings that
> are not referenced from any BTF type (and line information in .BTF.ext section,
> if any).
> 
> Algorithm also resolves struct/union forward declarations into concrete BTF types
> across multiple compilation units to facilitate better deduplication ratio. If
> undesired, this resolution can be disabled through specifying corresponding options.
> 
> When applied to BTF data emitted by pahole's DWARF->BTF converter, it reduces
> the overall size of .BTF section by about 65x, from about 112MB to 1.75MB, leaving
> only 29247 out of initial 3073497 BTF type descriptors.
> 
> Algorithm with minor differences and preliminary results before FUNC/FUNC_PROTO
> support is also described more verbosely at:
> https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html
> 
> Andrii Nakryiko (3):
>    btf: extract BTF type size calculation
>    btf: add BTF types deduplication algorithm
>    selftests/btf: add initial BTF dedup tests
> 
>   tools/lib/bpf/btf.c                    | 1851 +++++++++++++++++++++++-
>   tools/lib/bpf/btf.h                    |   11 +
>   tools/lib/bpf/libbpf.map               |    3 +
>   tools/testing/selftests/bpf/test_btf.c |  535 ++++++-
>   4 files changed, 2333 insertions(+), 67 deletions(-)
> 

^ permalink raw reply

* [PATCH net] l2tp: copy 4 more bytes to linear part if necessary
From: Jacob Wen @ 2019-01-31  7:18 UTC (permalink / raw)
  To: netdev; +Cc: gnault

The size of L2TPv2 header with all optional fields is 14 bytes.
l2tp_udp_recv_core only moves 10 bytes to the linear part of a
skb. This may lead to l2tp_recv_common read data outside of a skb.

This patch make sure that there is at least 14 bytes in the linear
part of a skb to meet the maximum need of l2tp_udp_recv_core and
l2tp_recv_common. The minimum size of both PPP HDLC-like frame and
Ethernet frame is larger than 14 bytes, so we are safe to do so.

Also remove L2TP_HDR_SIZE_NOSEQ, it is unused now.

Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Suggested-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Jacob Wen <jian.w.wen@oracle.com>
---
 net/l2tp/l2tp_core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index dd5ba0c11ab3..fed6becc5daf 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -83,8 +83,7 @@
 #define L2TP_SLFLAG_S	   0x40000000
 #define L2TP_SL_SEQ_MASK   0x00ffffff
 
-#define L2TP_HDR_SIZE_SEQ		10
-#define L2TP_HDR_SIZE_NOSEQ		6
+#define L2TP_HDR_SIZE_MAX		14
 
 /* Default trace flags */
 #define L2TP_DEFAULT_DEBUG_FLAGS	0
@@ -808,7 +807,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
 	__skb_pull(skb, sizeof(struct udphdr));
 
 	/* Short packet? */
-	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
+	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {
 		l2tp_info(tunnel, L2TP_MSG_DATA,
 			  "%s: recv short packet (len=%d)\n",
 			  tunnel->name, skb->len);
-- 
2.17.1


^ permalink raw reply related

* Re: r8169 Driver - Poor Network Performance Since Kernel 4.19
From: David Chang @ 2019-01-31  7:23 UTC (permalink / raw)
  To: Heiner Kallweit; +Cc: Peter Ceiley, Realtek linux nic maintainers, netdev
In-Reply-To: <4d832c16-8830-b746-a818-6026c2e6725c@gmail.com>

Hi Heiner,

On Jan 31, 2019 at 07:35:30 +0100, Heiner Kallweit wrote:
> Hi David, two more things:
> 
> 1. Could you please test a recent linux-next kernel?
> 2. Please get a register dump (ethtool -d <if>) from 4.18 and 4.19
>    and compare them.

I'm sorry that I do not have the issue machine handy. I would ask
our user to do the test. Thanks!

Regards,
David

> 
> Heiner
> 
> 
> On 31.01.2019 07:21, Heiner Kallweit wrote:
> > David, thanks for the link to the bug ticket.
> > I think only a proper bisect can help to find the offending commit.
> > 
> > Heiner
> > 
> > 
> > On 31.01.2019 03:32, David Chang wrote:
> >> Hi,
> >>
> >> We had a similr case here.
> >> - Realtek r8169 receive performance regression in kernel 4.19
> >>   https://bugzilla.suse.com/show_bug.cgi?id=1119649
> >>
> >> kernel: r8169 0000:01:00.0 eth0: RTL8168h/8111h, XID 54100880
> >> The major symptom is there are many rx_missed count.
> >>
> >>
> >> On Jan 30, 2019 at 20:15:45 +0100, Heiner Kallweit wrote:
> >>> Hi Peter,
> >>>
> >>> recently I had somebody where pcie_aspm=off for whatever reason didn't
> >>> do the trick, can you also check with pcie_aspm.policy=performance.
> >>
> >> We will give it a try later.
> >>
> >>> And please check with "ethtool -S <if>" whether the chip statistics
> >>> show a significant number of errors.
> >>>
> >>> If this doesn't help you may have to bisect to find the offending commit.
> >>
> >> We had tried fallback driver to a few previous commits as following,
> >> but with no luck.
> >>
> >> 9675931e6b65 r8169: re-enable MSI-X on RTL8168g (v4.19)
> >> 098b01ad9837 r8169: don't include asm headers directly (v4.19-rc1)
> >> a2965f12fde6 r8169: remove rtl8169_set_speed_xmii (v4.19-rc1)
> >> 6fcf9b1d4d6c r8169: fix runtime suspend (v4.19-rc1)
> >> e397286b8e89 r8169: remove TBI 1000BaseX support (v4.19-rc1)
> >>
> >> Thanks,
> >> David Chang
> >>
> >>>
> >>> Heiner
> >>>
> >>>
> >>> On 30.01.2019 10:59, Peter Ceiley wrote:
> >>>> Hi Heiner,
> >>>>
> >>>> I tried disabling the ASPM using the pcie_aspm=off kernel parameter
> >>>> and this made no difference.
> >>>>
> >>>> I tried compiling the 4.18.16 r8169.c with the 4.19.18 source and
> >>>> subsequently loaded the module in the running 4.19.18 kernel. I can
> >>>> confirm that this immediately resolved the issue and access to the NFS
> >>>> shares operated as expected.
> >>>>
> >>>> I presume this means it is an issue with the r8169 driver included in
> >>>> 4.19 onwards?
> >>>>
> >>>> To answer your last questions:
> >>>>
> >>>> Base Board Information
> >>>>     Manufacturer: Alienware
> >>>>     Product Name: 0PGRP5
> >>>>     Version: A02
> >>>>
> >>>> ... and yes, the RTL8168 is the onboard network chip.
> >>>>
> >>>> Regards,
> >>>>
> >>>> Peter.
> >>>>
> >>>> On Tue, 29 Jan 2019 at 17:44, Heiner Kallweit <hkallweit1@gmail.com> wrote:
> >>>>>
> >>>>> Hi Peter,
> >>>>>
> >>>>> I think the vendor driver doesn't enable ASPM per default.
> >>>>> So it's worth a try to disable ASPM in the BIOS or via sysfs.
> >>>>> Few older systems seem to have issues with ASPM, what kind of
> >>>>> system / mainboard are you using? The RTL8168 is the onboard
> >>>>> network chip?
> >>>>>
> >>>>> Rgds, Heiner
> >>>>>
> >>>>>
> >>>>> On 29.01.2019 07:20, Peter Ceiley wrote:
> >>>>>> Hi Heiner,
> >>>>>>
> >>>>>> Thanks, I'll do some more testing. It might not be the driver - I
> >>>>>> assumed it was due to the fact that using the r8168 driver 'resolves'
> >>>>>> the issue. I'll see if I can test the r8169.c on top of 4.19 - this is
> >>>>>> a good idea.
> >>>>>>
> >>>>>> Cheers,
> >>>>>>
> >>>>>> Peter.
> >>>>>>
> >>>>>> On Tue, 29 Jan 2019 at 17:16, Heiner Kallweit <hkallweit1@gmail.com> wrote:
> >>>>>>>
> >>>>>>> Hi Peter,
> >>>>>>>
> >>>>>>> at a first glance it doesn't look like a typical driver issue.
> >>>>>>> What you could do:
> >>>>>>>
> >>>>>>> - Test the r8169.c from 4.18 on top of 4.19.
> >>>>>>>
> >>>>>>> - Check whether disabling ASPM (/sys/module/pcie_aspm) has an effect.
> >>>>>>>
> >>>>>>> - Bisect between 4.18 and 4.19 to find the offending commit.
> >>>>>>>
> >>>>>>> Any specific reason why you think root cause is in the driver and not
> >>>>>>> elsewhere in the network subsystem?
> >>>>>>>
> >>>>>>> Heiner
> >>>>>>>
> >>>>>>>
> >>>>>>> On 28.01.2019 23:10, Peter Ceiley wrote:
> >>>>>>>> Hi Heiner,
> >>>>>>>>
> >>>>>>>> Thanks for getting back to me.
> >>>>>>>>
> >>>>>>>> No, I don't use jumbo packets.
> >>>>>>>>
> >>>>>>>> Bandwidth is *generally* good, and iperf results to my NAS provide
> >>>>>>>> over 900 Mbits/s in both circumstances. The issue seems to appear when
> >>>>>>>> establishing a connection and is most notable, for example, on my
> >>>>>>>> mounted NFS shares where it takes seconds (up to 10's of seconds on
> >>>>>>>> larger directories) to list the contents of each directory. Once a
> >>>>>>>> transfer begins on a file, I appear to get good bandwidth.
> >>>>>>>>
> >>>>>>>> I'm unsure of the best scientific data to provide you in order to
> >>>>>>>> troubleshoot this issue. Running the following
> >>>>>>>>
> >>>>>>>>     netstat -s |grep retransmitted
> >>>>>>>>
> >>>>>>>> shows a steady increase in retransmitted segments each time I list the
> >>>>>>>> contents of a remote directory, for example, running 'ls' on a
> >>>>>>>> directory containing 345 media files did the following using kernel
> >>>>>>>> 4.19.18:
> >>>>>>>>
> >>>>>>>> increased retransmitted segments by 21 and the 'time' command showed
> >>>>>>>> the following:
> >>>>>>>>     real    0m19.867s
> >>>>>>>>     user    0m0.012s
> >>>>>>>>     sys    0m0.036s
> >>>>>>>>
> >>>>>>>> The same command shows no retransmitted segments running kernel
> >>>>>>>> 4.18.16 and 'time' showed:
> >>>>>>>>     real    0m0.300s
> >>>>>>>>     user    0m0.004s
> >>>>>>>>     sys    0m0.007s
> >>>>>>>>
> >>>>>>>> ifconfig does not show any RX/TX errors nor dropped packets in either case.
> >>>>>>>>
> >>>>>>>> dmesg XID:
> >>>>>>>> [    2.979984] r8169 0000:03:00.0 eth0: RTL8168g/8111g,
> >>>>>>>> f8:b1:56:fe:67:e0, XID 4c000800, IRQ 32
> >>>>>>>>
> >>>>>>>> # lspci -vv
> >>>>>>>> 03:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd.
> >>>>>>>> RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller (rev 0c)
> >>>>>>>>     Subsystem: Dell RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller
> >>>>>>>>     Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
> >>>>>>>> ParErr- Stepping- SERR- FastB2B- DisINTx+
> >>>>>>>>     Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
> >>>>>>>> <TAbort- <MAbort- >SERR- <PERR- INTx-
> >>>>>>>>     Latency: 0, Cache Line Size: 64 bytes
> >>>>>>>>     Interrupt: pin A routed to IRQ 19
> >>>>>>>>     Region 0: I/O ports at d000 [size=256]
> >>>>>>>>     Region 2: Memory at f7b00000 (64-bit, non-prefetchable) [size=4K]
> >>>>>>>>     Region 4: Memory at f2100000 (64-bit, prefetchable) [size=16K]
> >>>>>>>>     Capabilities: [40] Power Management version 3
> >>>>>>>>         Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=375mA
> >>>>>>>> PME(D0+,D1+,D2+,D3hot+,D3cold+)
> >>>>>>>>         Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
> >>>>>>>>     Capabilities: [50] MSI: Enable- Count=1/1 Maskable- 64bit+
> >>>>>>>>         Address: 0000000000000000  Data: 0000
> >>>>>>>>     Capabilities: [70] Express (v2) Endpoint, MSI 01
> >>>>>>>>         DevCap:    MaxPayload 128 bytes, PhantFunc 0, Latency L0s
> >>>>>>>> <512ns, L1 <64us
> >>>>>>>>             ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset-
> >>>>>>>> SlotPowerLimit 10.000W
> >>>>>>>>         DevCtl:    CorrErr- NonFatalErr- FatalErr- UnsupReq-
> >>>>>>>>             RlxdOrd- ExtTag- PhantFunc- AuxPwr- NoSnoop-
> >>>>>>>>             MaxPayload 128 bytes, MaxReadReq 4096 bytes
> >>>>>>>>         DevSta:    CorrErr+ NonFatalErr- FatalErr- UnsupReq- AuxPwr+ TransPend-
> >>>>>>>>         LnkCap:    Port #0, Speed 2.5GT/s, Width x1, ASPM L0s L1, Exit
> >>>>>>>> Latency L0s unlimited, L1 <64us
> >>>>>>>>             ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp+
> >>>>>>>>         LnkCtl:    ASPM L1 Enabled; RCB 64 bytes Disabled- CommClk+
> >>>>>>>>             ExtSynch- ClockPM+ AutWidDis- BWInt- AutBWInt-
> >>>>>>>>         LnkSta:    Speed 2.5GT/s (ok), Width x1 (ok)
> >>>>>>>>             TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt-
> >>>>>>>>         DevCap2: Completion Timeout: Range ABCD, TimeoutDis+, LTR+,
> >>>>>>>> OBFF Via message/WAKE#
> >>>>>>>>              AtomicOpsCap: 32bit- 64bit- 128bitCAS-
> >>>>>>>>         DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-, LTR+,
> >>>>>>>> OBFF Disabled
> >>>>>>>>              AtomicOpsCtl: ReqEn-
> >>>>>>>>         LnkCtl2: Target Link Speed: 2.5GT/s, EnterCompliance- SpeedDis-
> >>>>>>>>              Transmit Margin: Normal Operating Range,
> >>>>>>>> EnterModifiedCompliance- ComplianceSOS-
> >>>>>>>>              Compliance De-emphasis: -6dB
> >>>>>>>>         LnkSta2: Current De-emphasis Level: -6dB,
> >>>>>>>> EqualizationComplete-, EqualizationPhase1-
> >>>>>>>>              EqualizationPhase2-, EqualizationPhase3-, LinkEqualizationRequest-
> >>>>>>>>     Capabilities: [b0] MSI-X: Enable+ Count=4 Masked-
> >>>>>>>>         Vector table: BAR=4 offset=00000000
> >>>>>>>>         PBA: BAR=4 offset=00000800
> >>>>>>>>     Capabilities: [d0] Vital Product Data
> >>>>>>>> pcilib: sysfs_read_vpd: read failed: Input/output error
> >>>>>>>>         Not readable
> >>>>>>>>     Capabilities: [100 v1] Advanced Error Reporting
> >>>>>>>>         UESta:    DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt-
> >>>>>>>> RxOF- MalfTLP- ECRC- UnsupReq- ACSViol-
> >>>>>>>>         UEMsk:    DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt-
> >>>>>>>> RxOF- MalfTLP- ECRC- UnsupReq- ACSViol-
> >>>>>>>>         UESvrt:    DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt-
> >>>>>>>> RxOF+ MalfTLP+ ECRC- UnsupReq- ACSViol-
> >>>>>>>>         CESta:    RxErr+ BadTLP+ BadDLLP+ Rollover- Timeout+ AdvNonFatalErr-
> >>>>>>>>         CEMsk:    RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+
> >>>>>>>>         AERCap:    First Error Pointer: 00, ECRCGenCap+ ECRCGenEn-
> >>>>>>>> ECRCChkCap+ ECRCChkEn-
> >>>>>>>>             MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap-
> >>>>>>>>         HeaderLog: 00000000 00000000 00000000 00000000
> >>>>>>>>     Capabilities: [140 v1] Virtual Channel
> >>>>>>>>         Caps:    LPEVC=0 RefClk=100ns PATEntryBits=1
> >>>>>>>>         Arb:    Fixed- WRR32- WRR64- WRR128-
> >>>>>>>>         Ctrl:    ArbSelect=Fixed
> >>>>>>>>         Status:    InProgress-
> >>>>>>>>         VC0:    Caps:    PATOffset=00 MaxTimeSlots=1 RejSnoopTrans-
> >>>>>>>>             Arb:    Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256-
> >>>>>>>>             Ctrl:    Enable+ ID=0 ArbSelect=Fixed TC/VC=01
> >>>>>>>>             Status:    NegoPending- InProgress-
> >>>>>>>>     Capabilities: [160 v1] Device Serial Number 01-00-00-00-68-4c-e0-00
> >>>>>>>>     Capabilities: [170 v1] Latency Tolerance Reporting
> >>>>>>>>         Max snoop latency: 71680ns
> >>>>>>>>         Max no snoop latency: 71680ns
> >>>>>>>>     Kernel driver in use: r8169
> >>>>>>>>     Kernel modules: r8169
> >>>>>>>>
> >>>>>>>> Please let me know if you have any other ideas in terms of testing.
> >>>>>>>>
> >>>>>>>> Thanks!
> >>>>>>>>
> >>>>>>>> Peter.
> >>>>>>>>
> >>>>>>>>
> >>>>>>>>
> >>>>>>>>
> >>>>>>>>
> >>>>>>>>
> >>>>>>>>
> >>>>>>>>
> >>>>>>>>
> >>>>>>>> On Tue, 29 Jan 2019 at 05:28, Heiner Kallweit <hkallweit1@gmail.com> wrote:
> >>>>>>>>>
> >>>>>>>>> On 28.01.2019 12:13, Peter Ceiley wrote:
> >>>>>>>>>> Hi,
> >>>>>>>>>>
> >>>>>>>>>> I have been experiencing very poor network performance since Kernel
> >>>>>>>>>> 4.19 and I'm confident it's related to the r8169 driver.
> >>>>>>>>>>
> >>>>>>>>>> I have no issue with kernel versions 4.18 and prior. I am experiencing
> >>>>>>>>>> this issue in kernels 4.19 and 4.20 (currently running/testing with
> >>>>>>>>>> 4.20.4 & 4.19.18).
> >>>>>>>>>>
> >>>>>>>>>> If someone could guide me in the right direction, I'm happy to help
> >>>>>>>>>> troubleshoot this issue. Note that I have been keeping an eye on one
> >>>>>>>>>> issue related to loading of the PHY driver, however, my symptoms
> >>>>>>>>>> differ in that I still have a network connection. I have attempted to
> >>>>>>>>>> reload the driver on a running system, but this does not improve the
> >>>>>>>>>> situation.
> >>>>>>>>>>
> >>>>>>>>>> Using the proprietary r8168 driver returns my device to proper working order.
> >>>>>>>>>>
> >>>>>>>>>> lshw shows:
> >>>>>>>>>>        description: Ethernet interface
> >>>>>>>>>>        product: RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller
> >>>>>>>>>>        vendor: Realtek Semiconductor Co., Ltd.
> >>>>>>>>>>        physical id: 0
> >>>>>>>>>>        bus info: pci@0000:03:00.0
> >>>>>>>>>>        logical name: enp3s0
> >>>>>>>>>>        version: 0c
> >>>>>>>>>>        serial:
> >>>>>>>>>>        size: 1Gbit/s
> >>>>>>>>>>        capacity: 1Gbit/s
> >>>>>>>>>>        width: 64 bits
> >>>>>>>>>>        clock: 33MHz
> >>>>>>>>>>        capabilities: pm msi pciexpress msix vpd bus_master cap_list
> >>>>>>>>>> ethernet physical tp aui bnc mii fibre 10bt 10bt-fd 100bt 100bt-fd
> >>>>>>>>>> 1000bt-fd autonegotiation
> >>>>>>>>>>        configuration: autonegotiation=on broadcast=yes driver=r8169
> >>>>>>>>>> duplex=full firmware=rtl8168g-2_0.0.1 02/06/13 ip=192.168.1.25
> >>>>>>>>>> latency=0 link=yes multicast=yes port=MII speed=1Gbit/s
> >>>>>>>>>>        resources: irq:19 ioport:d000(size=256)
> >>>>>>>>>> memory:f7b00000-f7b00fff memory:f2100000-f2103fff
> >>>>>>>>>>
> >>>>>>>>>> Kind Regards,
> >>>>>>>>>>
> >>>>>>>>>> Peter.
> >>>>>>>>>>
> >>>>>>>>> Hi Peter,
> >>>>>>>>>
> >>>>>>>>> the description "poor network performance" is quite vague, therefore:
> >>>>>>>>>
> >>>>>>>>> - Can you provide any measurements?
> >>>>>>>>> - iperf results before and after
> >>>>>>>>> - statistics about dropped packets (rx and/or tx)
> >>>>>>>>> - Do you use jumbo packets?
> >>>>>>>>>
> >>>>>>>>> Also help would be a "lspci -vv" output for the network card and
> >>>>>>>>> the dmesg output line with the chip XID.
> >>>>>>>>>
> >>>>>>>>> Heiner
> >>>>>>>>
> >>>>>>>
> >>>>>>
> >>>>>
> >>>>
> >>>
> >>
> > 
> 
> 

^ permalink raw reply

* Re: [PATCH] net: esp4: Fix double free on esp4 functions
From: Steffen Klassert @ 2019-01-31  7:27 UTC (permalink / raw)
  To: Ramin Farajpour Cami; +Cc: Eric Dumazet, davem, Herbert Xu, netdev
In-Reply-To: <CAKEzZ8zyhvJ1Fr6UDOxVqBnUz73xPSLwfnoM4FFGf6wHSTU=Jg@mail.gmail.com>

On Thu, Jan 31, 2019 at 06:32:07AM +0000, Ramin Farajpour Cami wrote:
> Hi Eric,
> 
> I going to for avoid double free of resource identifiers we should set
> variables initialized in "tmp/key" to NULL if an error occurred int the
> "esp_init_authenc()" and "esp_output_tail()" attempts to free the memory
> again.my means like this patch c7055fd15ff46d92eb0dd1c16a4fe010d58224c8
> <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c7055fd15ff46d92eb0dd1c16a4fe010d58224c8>

There is a big difference between the above-noted patch and
your patch. You really have to understand the code you are
trying to change.


^ permalink raw reply

* Re: [PATCH] can: flexcan: fix timeout when set small bitrate
From: Marc Kleine-Budde @ 2019-01-31  7:34 UTC (permalink / raw)
  To: Joakim Zhang, linux-can@vger.kernel.org
  Cc: wg@grandegger.com, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, dl-linux-imx, Aisheng Dong
In-Reply-To: <20190131065619.7298-1-qiangqing.zhang@nxp.com>


[-- Attachment #1.1: Type: text/plain, Size: 2059 bytes --]

On 1/31/19 7:58 AM, Joakim Zhang wrote:
> From: Dong Aisheng <aisheng.dong@nxp.com>
> 
> Current we can meet timeout issue when setting a small bitrate
> like 10000 as follows:
> root@imx6qdlsolo:~# ip link set can0 up type can bitrate 10000
> A link change request failed with some changes committed already.
> Interface can0 may have been left with an inconsistent configuration,
> please check.
> RTNETLINK answers: Connection timed out

Which SoC are you using? Which clock rate has the flexcan IP core?

> It is caused by calling of flexcan_chip_unfreeze() timeout.
> 
> Originally the code is using usleep_range(10, 20) for unfreeze operation,
> but the patch (8badd65 can: flexcan: avoid calling usleep_range from
> interrupt context) changed it into udelay(10) which is only a half delay
> of before, there're also some other delay changes.
> 
> After only changed unfreeze delay back to udelay(20), the issue is gone.
> So other timeout values are kept the same as 8badd65 changed.

Can you change FLEXCAN_TIMEOUT_US instead?

> Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
> Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> ---
>  drivers/net/can/flexcan.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
> index 2bca867bcfaa..1d3a9053bbeb 100644
> --- a/drivers/net/can/flexcan.c
> +++ b/drivers/net/can/flexcan.c
> @@ -530,7 +530,7 @@ static int flexcan_chip_unfreeze(struct flexcan_priv *priv)
>  	priv->write(reg, &regs->mcr);
>  
>  	while (timeout-- && (priv->read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK))
> -		udelay(10);
> +		udelay(20);
>  
>  	if (priv->read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK)
>  		return -ETIMEDOUT;
> 

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH net-next v3 1/8] devlink: add device information API
From: Jiri Pirko @ 2019-01-31  7:40 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, netdev, oss-drivers, andrew, f.fainelli, mkubecek, eugenem,
	jonathan.lemon
In-Reply-To: <20190130234133.4298-2-jakub.kicinski@netronome.com>

Thu, Jan 31, 2019 at 12:41:26AM CET, jakub.kicinski@netronome.com wrote:
>ethtool -i has served us well for a long time, but its showing
>its limitations more and more. The device information should
>also be reported per device not per-netdev.
>
>Lay foundation for a simple devlink-based way of reading device
>info. Add driver name and device serial number as initial pieces
>of information exposed via this new API.
>
>v3:
> - rename helpers (Jiri);
> - rename driver name attr (Jiri);
> - remove double spacing in commit message (Jiri).
>RFC v2:
> - wrap the skb into an opaque structure (Jiri);
> - allow the serial number of be any length (Jiri & Andrew);
> - add driver name (Jonathan).
>
>Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>

Acked-by: Jiri Pirko <jiri@mellanox.com>

^ permalink raw reply

* Re: [PATCH net-next v2 01/12] net: bridge: multicast: Propagate br_mc_disabled_update() return
From: Ido Schimmel @ 2019-01-31  7:50 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: netdev@vger.kernel.org, andrew@lunn.ch, vivien.didelot@gmail.com,
	davem@davemloft.net, Jiri Pirko, ilias.apalodimas@linaro.org,
	ivan.khoronzhuk@linaro.org, roopa@cumulusnetworks.com,
	nikolay@cumulusnetworks.com, Petr Machata
In-Reply-To: <63b446c5-8104-d9f9-3924-aaa481ee3b8c@gmail.com>

On Wed, Jan 30, 2019 at 05:00:57PM -0800, Florian Fainelli wrote:
> On 1/29/19 11:36 PM, Ido Schimmel wrote:
> > On Tue, Jan 29, 2019 at 04:55:37PM -0800, Florian Fainelli wrote:
> >> -static void br_mc_disabled_update(struct net_device *dev, bool value)
> >> +static int br_mc_disabled_update(struct net_device *dev, bool value)
> >>  {
> >>  	struct switchdev_attr attr = {
> >>  		.orig_dev = dev,
> >>  		.id = SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
> >> -		.flags = SWITCHDEV_F_DEFER,
> >> +		.flags = SWITCHDEV_F_DEFER | SWITCHDEV_F_SKIP_EOPNOTSUPP,
> > 
> > Actually, since the operation is deferred I don't think the return value
> > from the driver is ever checked. Can you test it?
> 
> You are right, you get a WARN() from switchdev_attr_port_set_now(), but
> this does not propagate back to the caller, so you can still create a
> bridge device and enslave a device successfully despite getting warnings
> on the console.
> 
> > 
> > I think it would be good to convert the attributes to use the switchdev
> > notifier like commit d17d9f5e5143 ("switchdev: Replace port obj add/del
> > SDO with a notification") did for objects. Then you can have your
> > listener veto the operation in the same context it is happening.
> 
> Alright, working on it. Would you do that just for the attr_set, or for
> attr_get as well (to be symmetrical)?

Yes, then we can get rid of switchdev_ops completely.

^ permalink raw reply

* Re: [PATCH net-next v3 2/8] devlink: add version reporting to devlink info API
From: Jiri Pirko @ 2019-01-31  7:57 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, netdev, oss-drivers, andrew, f.fainelli, mkubecek, eugenem,
	jonathan.lemon
In-Reply-To: <20190130234133.4298-3-jakub.kicinski@netronome.com>

Thu, Jan 31, 2019 at 12:41:27AM CET, jakub.kicinski@netronome.com wrote:
>ethtool -i has a few fixed-size fields which can be used to report
>firmware version and expansion ROM version. Unfortunately, modern
>hardware has more firmware components. There is usually some
>datapath microcode, management controller, PXE drivers, and a
>CPLD load. Running ethtool -i on modern controllers reveals the
>fact that vendors cram multiple values into firmware version field.
>
>Here are some examples from systems I could lay my hands on quickly:
>
>tg3:  "FFV20.2.17 bc 5720-v1.39"
>i40e: "6.01 0x800034a4 1.1747.0"
>nfp:  "0.0.3.5 0.25 sriov-2.1.16 nic"
>
>Add a new devlink API to allow retrieving multiple versions, and
>provide user-readable name for those versions.
>
>While at it break down the versions into three categories:
> - fixed - this is the board/fixed component version, usually vendors
>           report information like the board version in the PCI VPD,
>           but it will benefit from naming and common API as well;
> - running - this is the running firmware version;
> - stored - this is firmware in the flash, after firmware update
>            this value will reflect the flashed version, while the
>            running version may only be updated after reboot.
>
>v3:
> - add per-type helpers instead of using the special argument (Jiri).
>RFCv2:
> - remove the nesting in attr DEVLINK_ATTR_INFO_VERSIONS (now
>   versions are mixed with other info attrs)l
> - have the driver report versions from the same callback as
>   other info.
>
>Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>

Acked-by: Jiri Pirko <jiri@mellanox.com>

^ permalink raw reply

* Re: [PATCH net-next v3 3/8] devlink: add generic info version names
From: Jiri Pirko @ 2019-01-31  7:58 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, netdev, oss-drivers, andrew, f.fainelli, mkubecek, eugenem,
	jonathan.lemon
In-Reply-To: <20190130234133.4298-4-jakub.kicinski@netronome.com>

Thu, Jan 31, 2019 at 12:41:28AM CET, jakub.kicinski@netronome.com wrote:
>Add defines and docs for generic info versions.
>
>v3:
> - add docs;
> - separate patch (Jiri).
>
>Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>

Acked-by: Jiri Pirko <jiri@mellanox.com>

^ permalink raw reply

* Re: [PATCH net-next v3 4/8] nfp: devlink: report driver name and serial number
From: Jiri Pirko @ 2019-01-31  7:59 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, netdev, oss-drivers, andrew, f.fainelli, mkubecek, eugenem,
	jonathan.lemon
In-Reply-To: <20190130234133.4298-5-jakub.kicinski@netronome.com>

Thu, Jan 31, 2019 at 12:41:29AM CET, jakub.kicinski@netronome.com wrote:
>Report the basic info through new devlink info API.
>
>RFCv2:
> - add driver name;
> - align serial to core changes.
>
>Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>

Acked-by: Jiri Pirko <jiri@mellanox.com>

^ permalink raw reply

* Re: Latitude 5495's tg3 hangs under heavy load
From: Kai-Heng Feng @ 2019-01-31  8:08 UTC (permalink / raw)
  To: Siva Reddy Kallam
  Cc: Prashant Sreedharan, Michael Chan, Linux Netdev List,
	Chih-Hsyuan Ho
In-Reply-To: <7E14D23E-92C4-4D9E-B891-4F08075DF382@canonical.com>

Hi tg3 folks, 

> On Jan 9, 2019, at 13:42, Kai Heng Feng <kai.heng.feng@canonical.com> wrote:
> 
> 
> 
>> On Jan 7, 2019, at 5:12 PM, Siva Reddy Kallam <siva.kallam@broadcom.com> wrote:
>> 
>> On Mon, Jan 7, 2019 at 11:34 AM Kai Heng Feng
>> <kai.heng.feng@canonical.com> wrote:
>>> 
>>> Hi tg3 folks,
>>> 
>>> Any idea how to solve the bug?
>>> 
>>> Kai-Heng
>> Hi,
>> Can you share Register dump(ethtool -d)?
> 
> `ethtool -d` before freeze:
> https://pastebin.com/MSkJzhcv
> 
> `ethtool -d` after freeze:
> https://pastebin.com/dQj8mLsN
> 
>> Is ifconfig down/up bringing back interface?
> 
> Yes. And seems like network works fine afterward.
> 
> `ethtool -d` after down/up:
> https://pastebin.com/vL1gCC2n

Wondering if there’s any update?

Are these info sufficient?

Kai-Heng

> 
> Kai-Heng
> 
>> 
>>> 
>>>> On Dec 7, 2018, at 17:27, Kai Heng Feng <kai.heng.feng@canonical.com> wrote:
>>>> 
>>>> Hi tg3 maintainers,
>>>> 
>>>> I’ve encountered network freeze when using tg3 in gigabits net.
>>>> 
>>>> The issue can be easily reproduced when using scp to transfer files in local network.
>>>> 
>>>> The symptom is pretty similar to what this commit is trying to solve:
>>>> commit 3a498606bb04af603a46ebde8296040b2de350d1
>>>> Author: Sanjeev Bansal <sanjeevb.bansal@broadcom.com>
>>>> Date:   Mon Jul 16 11:13:32 2018 +0530
>>>> 
>>>>  tg3: Add higher cpu clock for 5762.
>>>> 
>>>>  This patch has fix for TX timeout while running bi-directional
>>>>  traffic with 100 Mbps using 5762.
>>>> 
>>>> But reverting this commit doesn’t help.
>>>> 
>>>> Latitude 5495 is a AMD Raven Ridge platform, not sure if this matters.
>>>> 
>>>> Here’s the lspci for this device:
>>>> 03:00.0 Ethernet controller [0200]: Broadcom Inc. and subsidiaries NetXtreme BCM5762 Gigabit Ethernet PCIe [14e4:1687] (rev 10)
>>>>      Subsystem: Dell NetXtreme BCM5762 Gigabit Ethernet PCIe [1028:0814]
>>>>      Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+
>>>>      Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
>>>>      Latency: 0
>>>>      Interrupt: pin A routed to IRQ 16
>>>>      Region 0: Memory at e0220000 (64-bit, prefetchable) [size=64K]
>>>>      Region 2: Memory at e0210000 (64-bit, prefetchable) [size=64K]
>>>>      Region 4: Memory at e0200000 (64-bit, prefetchable) [size=64K]
>>>>      Capabilities: [48] Power Management version 3
>>>>              Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
>>>>              Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=1 PME-
>>>>      Capabilities: [50] Vital Product Data
>>>>              Product Name: Broadcom NetXtreme Gigabit Ethernet Controller
>>>>              Read-only fields:
>>>>                      [PN] Part number: BCM95762
>>>>                      [EC] Engineering changes: 106679-15
>>>>                      [SN] Serial number: 0123456789
>>>>                      [MN] Manufacture ID: 31 34 65 34
>>>>                      [RV] Reserved: checksum good, 28 byte(s) reserved
>>>>              Read/write fields:
>>>>                      [YA] Asset tag: XYZ01234567
>>>>                      [RW] Read-write area: 107 byte(s) free
>>>>              End
>>>>      Capabilities: [58] MSI: Enable- Count=1/8 Maskable- 64bit+
>>>>              Address: 0000000000000000  Data: 0000
>>>>      Capabilities: [a0] MSI-X: Enable+ Count=6 Masked-
>>>>              Vector table: BAR=4 offset=00000000
>>>>              PBA: BAR=2 offset=00000120
>>>>      Capabilities: [ac] Express (v2) Endpoint, MSI 00
>>>>              DevCap: MaxPayload 256 bytes, PhantFunc 0, Latency L0s <4us, L1 <64us
>>>>                      ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset+ SlotPowerLimit 0.000W
>>>>              DevCtl: Report errors: Correctable- Non-Fatal- Fatal- Unsupported-
>>>>                      RlxdOrd- ExtTag- PhantFunc- AuxPwr+ NoSnoop- FLReset-
>>>>                      MaxPayload 128 bytes, MaxReadReq 4096 bytes
>>>>              DevSta: CorrErr- UncorrErr- FatalErr- UnsuppReq- AuxPwr+ TransPend-
>>>>              LnkCap: Port #0, Speed 5GT/s, Width x1, ASPM L0s L1, Exit Latency L0s <2us, L1 <64us
>>>>                      ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp+
>>>>              LnkCtl: ASPM Disabled; RCB 64 bytes Disabled- CommClk+
>>>>                      ExtSynch- ClockPM+ AutWidDis- BWInt- AutBWInt-
>>>>              LnkSta: Speed 5GT/s, Width x1, TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt-
>>>>              DevCap2: Completion Timeout: Range ABCD, TimeoutDis+, LTR+, OBFF Via WAKE#
>>>>              DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-, LTR+, OBFF Disabled
>>>>              LnkCtl2: Target Link Speed: 2.5GT/s, EnterCompliance- SpeedDis-
>>>>                       Transmit Margin: Normal Operating Range, EnterModifiedCompliance- ComplianceSOS-
>>>>                       Compliance De-emphasis: -6dB
>>>>              LnkSta2: Current De-emphasis Level: -3.5dB, EqualizationComplete-, EqualizationPhase1-
>>>>                       EqualizationPhase2-, EqualizationPhase3-, LinkEqualizationRequest-
>>>>      Capabilities: [100 v1] Advanced Error Reporting
>>>>              UESta:  DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq- ACSViol-
>>>>              UEMsk:  DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq- ACSViol-
>>>>              UESvrt: DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+ MalfTLP+ ECRC- UnsupReq- ACSViol-
>>>>              CESta:  RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
>>>>              CEMsk:  RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr+
>>>>              AERCap: First Error Pointer: 00, GenCap+ CGenEn- ChkCap+ ChkEn-
>>>>      Capabilities: [13c v1] Device Serial Number 00-00-a4-4c-c8-5b-65-74
>>>>      Capabilities: [150 v1] Power Budgeting <?>
>>>>      Capabilities: [160 v1] Virtual Channel
>>>>              Caps:   LPEVC=0 RefClk=100ns PATEntryBits=1
>>>>              Arb:    Fixed- WRR32- WRR64- WRR128-
>>>>              Ctrl:   ArbSelect=Fixed
>>>>              Status: InProgress-
>>>>              VC0:    Caps:   PATOffset=00 MaxTimeSlots=1 RejSnoopTrans-
>>>>                      Arb:    Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256-
>>>>                      Ctrl:   Enable+ ID=0 ArbSelect=Fixed TC/VC=ff
>>>>                      Status: NegoPending- InProgress-
>>>>      Capabilities: [1b0 v1] Latency Tolerance Reporting
>>>>              Max snoop latency: 1048576ns
>>>>              Max no snoop latency: 1048576ns
>>>>      Capabilities: [230 v1] Transaction Processing Hints
>>>>              Interrupt vector mode supported
>>>>              Steering table in MSI-X table
>>>>      Kernel driver in use: tg3
>>>>      Kernel modules: tg3
>>>> 
>>>> Please let me know if you need more information.
>>>> 
>>>> Kai-Heng
>>> 
> 


^ permalink raw reply

* Re: [PATCH net-next v3 5/8] nfp: devlink: report fixed versions
From: Jiri Pirko @ 2019-01-31  8:00 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, netdev, oss-drivers, andrew, f.fainelli, mkubecek, eugenem,
	jonathan.lemon
In-Reply-To: <20190130234133.4298-6-jakub.kicinski@netronome.com>

Thu, Jan 31, 2019 at 12:41:30AM CET, jakub.kicinski@netronome.com wrote:
>Report information about the hardware.
>
>RFCv2:
> - add defines for board IDs which are likely to be reusable for
>   other drivers (Jiri).
>
>Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>

Acked-by: Jiri Pirko <jiri@mellanox.com>

^ permalink raw reply

* Re: [PATCH net-next v3 7/8] nfp: devlink: report the running and flashed versions
From: Jiri Pirko @ 2019-01-31  8:03 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, netdev, oss-drivers, andrew, f.fainelli, mkubecek, eugenem,
	jonathan.lemon
In-Reply-To: <20190130234133.4298-8-jakub.kicinski@netronome.com>

Thu, Jan 31, 2019 at 12:41:32AM CET, jakub.kicinski@netronome.com wrote:
>Report versions of firmware components using the new NSP command.
>
>Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>

Acked-by: Jiri Pirko <jiri@mellanox.com>

^ permalink raw reply

* Re: [PATCH net] l2tp: copy 4 more bytes to linear part if necessary
From: Guillaume Nault @ 2019-01-31  8:43 UTC (permalink / raw)
  To: Jacob Wen; +Cc: netdev
In-Reply-To: <20190131071856.22120-1-jian.w.wen@oracle.com>

On Thu, Jan 31, 2019 at 03:18:56PM +0800, Jacob Wen wrote:
> The size of L2TPv2 header with all optional fields is 14 bytes.
> l2tp_udp_recv_core only moves 10 bytes to the linear part of a
> skb. This may lead to l2tp_recv_common read data outside of a skb.
> 
Looks good, thanks for taking care of this!

Acked-by: Guillaume Nault <gnault@redhat.com>

^ permalink raw reply

* Re: [PATCH net-next v8 0/8] devlink: Add configuration parameters support for devlink_port
From: Vasundhara Volam @ 2019-01-31  8:44 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: David Miller, michael.chan@broadcom.com, Jiri Pirko,
	Michal Kubecek, Netdev
In-Reply-To: <20190130155758.3a6be144@cakuba.hsd1.ca.comcast.net>

On Thu, Jan 31, 2019 at 5:28 AM Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
>
> On Mon, 28 Jan 2019 18:00:19 +0530, Vasundhara Volam wrote:
> > This patchset adds support for configuration parameters setting through
> > devlink_port.  Each device registers supported configuration parameters
> > table.
> >
> > The user can retrieve data on these parameters by
> > "devlink port param show" command and can set new value to a
> > parameter by "devlink port param set" command.
> > All configuration modes supported by devlink_dev are supported
> > by devlink_port also.
>
> Hm, I think we were kind of going somewhere with the ethtool/nl
> attribute encapsulation idea.  You seem to have ignored those comments
> on v7 and reposted v8 a day after.
Jakub, I have added the idea of future expansion of WOL in my v8 cover letter
mentioning the same. I will work on this as a future patchset.
>
> I think we should explore the nesting further.  The only obstacle is
> that ethtool netlink conversion is not yet finished, but that's just
> a simple matter of programming.  Do you disagree with that direction?
> Please comment.
No, I agree with you about ethtool netlink encapsulation.

^ permalink raw reply

* RE: [PATCH] can: flexcan: fix timeout when set small bitrate
From: Joakim Zhang @ 2019-01-31  8:48 UTC (permalink / raw)
  To: Marc Kleine-Budde, linux-can@vger.kernel.org
  Cc: wg@grandegger.com, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, dl-linux-imx, Aisheng Dong
In-Reply-To: <1902c81b-86fa-469e-b495-c37dca744cb9@pengutronix.de>


Hi Marc,

> -----Original Message-----
> From: Marc Kleine-Budde <mkl@pengutronix.de>
> Sent: 2019年1月31日 15:34
> To: Joakim Zhang <qiangqing.zhang@nxp.com>; linux-can@vger.kernel.org
> Cc: wg@grandegger.com; netdev@vger.kernel.org;
> linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>; Aisheng
> Dong <aisheng.dong@nxp.com>
> Subject: Re: [PATCH] can: flexcan: fix timeout when set small bitrate
> 
> On 1/31/19 7:58 AM, Joakim Zhang wrote:
> > From: Dong Aisheng <aisheng.dong@nxp.com>
> >
> > Current we can meet timeout issue when setting a small bitrate like
> > 10000 as follows:
> > root@imx6qdlsolo:~# ip link set can0 up type can bitrate 10000 A link
> > change request failed with some changes committed already.
> > Interface can0 may have been left with an inconsistent configuration,
> > please check.
> > RTNETLINK answers: Connection timed out
> 
> Which SoC are you using? Which clock rate has the flexcan IP core?

  We tested on i.MX6 series boards and all met this issue. And ipg clock rate is 66MHZ, per clock rate is 30MHZ.

> > It is caused by calling of flexcan_chip_unfreeze() timeout.
> >
> > Originally the code is using usleep_range(10, 20) for unfreeze
> > operation, but the patch (8badd65 can: flexcan: avoid calling
> > usleep_range from interrupt context) changed it into udelay(10) which
> > is only a half delay of before, there're also some other delay changes.
> >
> > After only changed unfreeze delay back to udelay(20), the issue is gone.
> > So other timeout values are kept the same as 8badd65 changed.
> 
> Can you change FLEXCAN_TIMEOUT_US instead?

  Of course, we can change FLEXCAN_TIMEOUT_US to 100, but this will extend the time of enable/disable/softreset.
Which method do you think is better?

Best Regards,
Joakim Zhang

> > Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
> > Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> > ---
> >  drivers/net/can/flexcan.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
> > index 2bca867bcfaa..1d3a9053bbeb 100644
> > --- a/drivers/net/can/flexcan.c
> > +++ b/drivers/net/can/flexcan.c
> > @@ -530,7 +530,7 @@ static int flexcan_chip_unfreeze(struct flexcan_priv
> *priv)
> >  	priv->write(reg, &regs->mcr);
> >
> >  	while (timeout-- && (priv->read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK))
> > -		udelay(10);
> > +		udelay(20);
> >
> >  	if (priv->read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK)
> >  		return -ETIMEDOUT;
> >
> 
> Marc
> 
> --
> Pengutronix e.K.                  | Marc Kleine-Budde           |
> Industrial Linux Solutions        | Phone: +49-231-2826-924     |
> Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
> Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


^ permalink raw reply

* Re: [PATCH v5 bpf-next 1/9] bpf: introduce bpf_spin_lock
From: Peter Zijlstra @ 2019-01-31  8:49 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Alexei Starovoitov, davem, daniel, jannh, paulmck, will.deacon,
	mingo, netdev, kernel-team
In-Reply-To: <20190130213418.gxbyfbmuiohn7vj4@ast-mbp.dhcp.thefacebook.com>

On Wed, Jan 30, 2019 at 01:34:19PM -0800, Alexei Starovoitov wrote:
> On Wed, Jan 30, 2019 at 10:05:29PM +0100, Peter Zijlstra wrote:
> > 
> > Would something like the below work for you instead?
> > 
> > I find it easier to read, and the additional CONFIG symbol would give
> > architectures (say ARM) an easy way to force the issue.
> > 
> > 
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -221,6 +221,72 @@ const struct bpf_func_proto bpf_get_curr
> >  	.arg2_type	= ARG_CONST_SIZE,
> >  };
> >  
> > +#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
> > +
> > +static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
> > +{
> > +	arch_spinlock_t *l = (void *)lock;
> > +	BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
> > +	if (1) {
> > +		union {
> > +			__u32 val;
> > +			arch_spinlock_t lock;
> > +		} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
> > +		compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
> > +	}
> > +	arch_spin_lock(l);
> 
> And archs can select CONFIG_BPF_ARCH_SPINLOCK when they don't
> use qspinlock and their arch_spinlock_t is compatible ?
> Nice. I like the idea!

Exactly, took me a little while to come up with that test for
__ARCH_SPIN_LOCK_UNLOCKED, but it now checks for both assumptions, so no
surprises when people get it wrong by accident.

> > +}
> > +
> > +static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
> > +{
> > +	arch_spinlock_t *l = (void *)lock;
> > +	arch_spin_unlock(l);
> > +}
> > +
> > +#else
> > +
> > +static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
> > +{
> > +	atomic_t *l = (void *)lock;
> > +	do {
> > +		atomic_cond_read_relaxed(l, !VAL);
> 
> wow. that's quite a macro magic.

Yeah, C sucks for not having lambdas, this was the best we could come up
with.

This basically allows architectures to optimize the
wait-for-variable-to-change thing. Currently only ARM64 does that, I
have a horrible horrible patch that makes x86 use MONITOR/MWAIT for
this, and I suppose POWER should use it but doesn't.

> Should it be
> atomic_cond_read_relaxed(l, (!VAL));
> like qspinlock.c does ?

Extra parens doesn't hurt of course, but I don't think it's strictly
needed, the atomic_cond_read_*() wrappers already add extra parent
before passing it on to smp_cond_load_*().

^ permalink raw reply

* Re: general protection fault in __xfrm_policy_bysel_ctx
From: Dmitry Vyukov @ 2019-01-31  8:49 UTC (permalink / raw)
  To: Florian Westphal
  Cc: syzbot, David Miller, Herbert Xu, LKML, netdev, Steffen Klassert,
	syzkaller-bugs
In-Reply-To: <CACT4Y+ZWafAqKCt0ubJREcuHx4EiKhNKL0X-et_Y9YZBr8adbg@mail.gmail.com>

On Wed, Jan 30, 2019 at 3:30 PM Dmitry Vyukov <dvyukov@google.com> wrote:
>
> On Wed, Jan 30, 2019 at 3:20 PM Florian Westphal <fw@strlen.de> wrote:
> >
> > Dmitry Vyukov <dvyukov@google.com> wrote:
> > > > syzbot <syzbot+e6e1fe9148cffa18cf97@syzkaller.appspotmail.com> wrote:
> > > > > Hello,
> > > > >
> > > > > syzbot found the following crash on:
> > > > >
> > > > > HEAD commit:    085c4c7dd2b6 net: lmc: remove -I. header search path
> > > > > git tree:       net-next
> > > > > console output: https://syzkaller.appspot.com/x/log.txt?x=12347128c00000
> > > > > kernel config:  https://syzkaller.appspot.com/x/.config?x=505743eba4e4f68
> > > > > dashboard link: https://syzkaller.appspot.com/bug?extid=e6e1fe9148cffa18cf97
> > > > > compiler:       gcc (GCC) 9.0.0 20181231 (experimental)
> > > > >
> > > > > Unfortunately, I don't have any reproducer for this crash yet.
> > > >
> > > > net-next doesn't contain the fixes for the rbtree fallout yet, so
> > > > this might already be fixed (fingers crossed).
> > >
> > > Hi Florian,
> > >
> > > What is that fix for the record?
> >
> > I don't know.  I managed to add every bug class imagineable in that series 8-(
> >
> > The last (most recent) fix from the 'fallout cleanup' is:
> > 12750abad517a991c4568969bc748db302ab52cd
> > ("xfrm: policy: fix infinite loop when merging src-nodes")
> >
> > so if syzkaller can generate a splat with that change present
> > something is still broken.
> >
> > > We will need to close this later. Or perhaps we can already mark this
> > > as fixed by that patch with "#syz fix:" command?
> >
> > There are a lot of open xfrm related splats that could all be explained
> > by the rbtree bugs (one had a reproducer, the fix has appropriate
> > reported-by tag).
> >
> > It would be great if there was a way to tell syzkaller to report those
> > again if they still appear.
>
> That's exactly what "#syz fix:" will do.
> syzbot will wait until the fixing commit appears in all builds/trees
> it tests, then close this bug, and then any new similarly looking
> crash will produce a new bug report. So if the patch indeed fixes the
> bug, then the bug will be closed and we are done. If it does not fix
> this bug, then we will get another report but at that time on a tree
> that includes the commit.
>
> > I could pretend and claim above commit as "sys-fix", but it seems fishy.
> >
> > Let me know and I can tag all of them.
>
> It's "safe" to mark these crashes as fixed when we are not 100% sure
> in the sense that we won't lose the bug (it will be reported again
> later if it's not fixed).
> It's also useful to keep the list of open/active bugs shorter and more
> precise (don't leave too many obsoleted open bugs). What happened
> multiple times is that a bug was fixed but left open, and then a
> similarly looking crashes started happening again (a new bug), but it
> wasn't reported by syzbot because for syzbot it looked like the old
> still unfixed bug.

Thanks for cleaning it all up!

(Florian updated 17 other bugs)

^ permalink raw reply

* Re: [PATCH v5 bpf-next 1/9] bpf: introduce bpf_spin_lock
From: Peter Zijlstra @ 2019-01-31  8:51 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Alexei Starovoitov, davem, daniel, jannh, paulmck, will.deacon,
	mingo, netdev, kernel-team, Nicholas Piggin
In-Reply-To: <20190131084952.GG2296@hirez.programming.kicks-ass.net>

On Thu, Jan 31, 2019 at 09:49:52AM +0100, Peter Zijlstra wrote:
> On Wed, Jan 30, 2019 at 01:34:19PM -0800, Alexei Starovoitov wrote:
> > On Wed, Jan 30, 2019 at 10:05:29PM +0100, Peter Zijlstra wrote:

> > > +static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
> > > +{
> > > +	atomic_t *l = (void *)lock;
> > > +	do {
> > > +		atomic_cond_read_relaxed(l, !VAL);
> > 
> > wow. that's quite a macro magic.
> 
> Yeah, C sucks for not having lambdas, this was the best we could come up
> with.
> 
> This basically allows architectures to optimize the
> wait-for-variable-to-change thing. Currently only ARM64 does that, I
> have a horrible horrible patch that makes x86 use MONITOR/MWAIT for
> this, and I suppose POWER should use it but doesn't.

Nick, do you guys want something like this?

---
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index fbe8df433019..111984c5670d 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -99,6 +99,20 @@ do {									\
 #define barrier_nospec()
 #endif /* CONFIG_PPC_BARRIER_NOSPEC */
 
+#define smp_cond_load_relaxed(ptr, cond_expr) ({		\
+	typeof(ptr) __PTR = (ptr);				\
+	typeof(*ptr) VAL = READ_ONCE(*__PTR);			\
+	if (unlikely(!(cond_expr))) {				\
+		spin_begin();					\
+		do {						\
+			spin_cpu_relax();			\
+			VAL = READ_ONCE(*__PTR);		\
+		} while (!(cond_expr));				\
+		spin_end();					\
+	}							\
+	VAL;							\
+})
+
 #include <asm-generic/barrier.h>
 
 #endif /* _ASM_POWERPC_BARRIER_H */

^ permalink raw reply related

* Re: [PATCH] bpf: selftests: handle sparse CPU allocations
From: Martynas @ 2019-01-31  8:57 UTC (permalink / raw)
  To: Y Song; +Cc: netdev, Alexei Starovoitov, Daniel Borkmann
In-Reply-To: <CAH3MdRUKvPpxmOL_BGjsKeiWTEWCgKNiDKGsXsVzDLbY5F+Dzw@mail.gmail.com>

Oops, thanks for noticing the issue with strlen in the for loop. Changing to "int len = strlen(buff); for (i = 0; i < len; i++) <..>" should fix the issue. I'm going to re-submit the patch.

Also, I put some testing results: https://gist.github.com/brb/5369b5cfd08babb80cf2c4081dc19762

On Thu, Jan 31, 2019, at 7:25 AM, Y Song wrote:
> [ My reply somehow rejected by netdev, this is to send it again. ]
> 
> On Wed, Jan 30, 2019 at 1:19 AM Martynas Pumputis <m@lambda.lt> wrote:
> >
> > Previously, bpf_num_possible_cpus() had a bug when calculating a
> > number of possible CPUs in the case of sparse CPU allocations, as
> > it was considering only the first range or element of
> > /sys/devices/system/cpu/possible.
> >
> > E.g. in the case of "0,2-3" (CPU 1 is not available), the function
> > returned 1 instead of 3.
> >
> > This patch fixes the function by making it parse all CPU ranges and
> > elements.
> >
> > Signed-off-by: Martynas Pumputis <m@lambda.lt>
> > ---
> >  tools/testing/selftests/bpf/bpf_util.h | 29 +++++++++++++++++---------
> >  1 file changed, 19 insertions(+), 10 deletions(-)
> >
> > diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
> > index 315a44fa32af..8cab50408204 100644
> > --- a/tools/testing/selftests/bpf/bpf_util.h
> > +++ b/tools/testing/selftests/bpf/bpf_util.h
> > @@ -13,7 +13,7 @@ static inline unsigned int bpf_num_possible_cpus(void)
> >         unsigned int start, end, possible_cpus = 0;
> >         char buff[128];
> >         FILE *fp;
> > -       int n;
> > +       int n, i, j = 0;
> >
> >         fp = fopen(fcpu, "r");
> >         if (!fp) {
> > @@ -21,17 +21,26 @@ static inline unsigned int bpf_num_possible_cpus(void)
> >                 exit(1);
> >         }
> >
> > -       while (fgets(buff, sizeof(buff), fp)) {
> > -               n = sscanf(buff, "%u-%u", &start, &end);
> > -               if (n == 0) {
> > -                       printf("Failed to retrieve # possible CPUs!\n");
> > -                       exit(1);
> > -               } else if (n == 1) {
> > -                       end = start;
> > +       if (!fgets(buff, sizeof(buff), fp)) {
> > +               printf("Failed to read %s!\n", fcpu);
> > +               exit(1);
> > +       }
> > +
> > +       for (i = 0; i <= strlen(buff); i++) {
> > +               if (buff[i] == ',' || buff[i] == '\0') {
> > +                       buff[i] = '\0';
> 
> This does not sound right. For example, the cpu list "0,2-3",
> you will change "," to '\0" so buffer becomes "0\02-3".
> The next iteration you will get strlen(buff) = 1.
> The "2-3" will be skipped.
> 
> > +                       n = sscanf(&buff[j], "%u-%u", &start, &end);
> > +                       if (n <= 0) {
> > +                               printf("Failed to retrieve # possible CPUs!\n");
> > +                               exit(1);
> > +                       } else if (n == 1) {
> > +                               end = start;
> > +                       }
> > +                       possible_cpus += end - start + 1;
> > +                       j = i + 1;
> >                 }
> > -               possible_cpus = start == 0 ? end + 1 : 0;
> > -               break;
> >         }
> > +
> >         fclose(fp);
> >
> >         return possible_cpus;
> > --
> > 2.20.1
> >

^ permalink raw reply

* RE: [PATCH] can: flexcan: fix timeout when set small bitrate
From: Aisheng Dong @ 2019-01-31  9:09 UTC (permalink / raw)
  To: Joakim Zhang, Marc Kleine-Budde, linux-can@vger.kernel.org
  Cc: wg@grandegger.com, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, dl-linux-imx
In-Reply-To: <DB7PR04MB46189FE45531AA8FD65D5327E6910@DB7PR04MB4618.eurprd04.prod.outlook.com>

> From: Joakim Zhang
> Sent: Thursday, January 31, 2019 4:49 PM
[...]
> > > Current we can meet timeout issue when setting a small bitrate like
> > > 10000 as follows:
> > > root@imx6qdlsolo:~# ip link set can0 up type can bitrate 10000 A
> > > link change request failed with some changes committed already.
> > > Interface can0 may have been left with an inconsistent
> > > configuration, please check.
> > > RTNETLINK answers: Connection timed out
> >
> > Which SoC are you using? Which clock rate has the flexcan IP core?
> 
>   We tested on i.MX6 series boards and all met this issue. And ipg clock rate is
> 66MHZ, per clock rate is 30MHZ.
> 
> > > It is caused by calling of flexcan_chip_unfreeze() timeout.
> > >
> > > Originally the code is using usleep_range(10, 20) for unfreeze
> > > operation, but the patch (8badd65 can: flexcan: avoid calling
> > > usleep_range from interrupt context) changed it into udelay(10)
> > > which is only a half delay of before, there're also some other delay
> changes.
> > >
> > > After only changed unfreeze delay back to udelay(20), the issue is gone.
> > > So other timeout values are kept the same as 8badd65 changed.
> >
> > Can you change FLEXCAN_TIMEOUT_US instead?
> 
>   Of course, we can change FLEXCAN_TIMEOUT_US to 100, but this will
> extend the time of enable/disable/softreset.
> Which method do you think is better?
> 

That seems not a big deal for an error case.
So change FLEXCAN_TIMEOUT_US seems like a good suggestion to me.
You can cook a patch with commit message updated. No need keep my author name
as it's a different solution.

Regards
Dong Aisheng

> Best Regards,
> Joakim Zhang
> 
> > > Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
> > > Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> > > ---
> > >  drivers/net/can/flexcan.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
> > > index 2bca867bcfaa..1d3a9053bbeb 100644
> > > --- a/drivers/net/can/flexcan.c
> > > +++ b/drivers/net/can/flexcan.c
> > > @@ -530,7 +530,7 @@ static int flexcan_chip_unfreeze(struct
> > > flexcan_priv
> > *priv)
> > >  	priv->write(reg, &regs->mcr);
> > >
> > >  	while (timeout-- && (priv->read(&regs->mcr) &
> FLEXCAN_MCR_FRZ_ACK))
> > > -		udelay(10);
> > > +		udelay(20);
> > >
> > >  	if (priv->read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK)
> > >  		return -ETIMEDOUT;
> > >
> >
> > Marc
> >
> > --
> > Pengutronix e.K.                  | Marc Kleine-Budde           |
> > Industrial Linux Solutions        | Phone: +49-231-2826-924     |
> > Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
> > Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox