Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH bpf-next 1/6] bpf: btf: Avoid WARN_ON when CONFIG_REFCOUNT_FULL=y
From: Martin KaFai Lau @ 2018-05-04 21:49 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180504214955.1058805-1-kafai@fb.com>

If CONFIG_REFCOUNT_FULL=y, refcount_inc() WARN when refcount is 0.
When creating a new btf, the initial btf->refcnt is 0 and
triggered the following:

[   34.855452] refcount_t: increment on 0; use-after-free.
[   34.856252] WARNING: CPU: 6 PID: 1857 at lib/refcount.c:153 refcount_inc+0x26/0x30
....
[   34.868809] Call Trace:
[   34.869168]  btf_new_fd+0x1af6/0x24d0
[   34.869645]  ? btf_type_seq_show+0x200/0x200
[   34.870212]  ? lock_acquire+0x3b0/0x3b0
[   34.870726]  ? security_capable+0x54/0x90
[   34.871247]  __x64_sys_bpf+0x1b2/0x310
[   34.871761]  ? __ia32_sys_bpf+0x310/0x310
[   34.872285]  ? bad_area_access_error+0x310/0x310
[   34.872894]  do_syscall_64+0x95/0x3f0

This patch uses refcount_set() instead.

Reported-by: Yonghong Song <yhs@fb.com>
Tested-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 kernel/bpf/btf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 22e1046a1a86..fa0dce0452e7 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1977,7 +1977,7 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
 
 	if (!err) {
 		btf_verifier_env_free(env);
-		btf_get(btf);
+		refcount_set(&btf->refcnt, 1);
 		return btf;
 	}
 
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 3/6] bpf: btf: Add struct bpf_btf_info
From: Martin KaFai Lau @ 2018-05-04 21:49 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180504214955.1058805-1-kafai@fb.com>

During BPF_OBJ_GET_INFO_BY_FD on a btf_fd, the current bpf_attr's
info.info is directly filled with the BTF binary data.  It is
not extensible.  In this case, we want to add BTF ID.

This patch adds "struct bpf_btf_info" which has the BTF ID as
one of its member.  The BTF binary data itself is exposed through
the "btf" and "btf_size" members.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
---
 include/uapi/linux/bpf.h |  6 ++++++
 kernel/bpf/btf.c         | 26 +++++++++++++++++++++-----
 kernel/bpf/syscall.c     | 17 ++++++++++++++++-
 3 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6106f23a9a8a..d615c777b573 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2137,6 +2137,12 @@ struct bpf_map_info {
 	__u32 btf_value_id;
 } __attribute__((aligned(8)));
 
+struct bpf_btf_info {
+	__aligned_u64 btf;
+	__u32 btf_size;
+	__u32 id;
+} __attribute__((aligned(8)));
+
 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
  * by user and intended to be used by socket (e.g. to bind to, depends on
  * attach attach type).
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 40950b6bf395..ded10ab47b8a 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -2114,12 +2114,28 @@ int btf_get_info_by_fd(const struct btf *btf,
 		       const union bpf_attr *attr,
 		       union bpf_attr __user *uattr)
 {
-	void __user *udata = u64_to_user_ptr(attr->info.info);
-	u32 copy_len = min_t(u32, btf->data_size,
-			     attr->info.info_len);
+	struct bpf_btf_info __user *uinfo;
+	struct bpf_btf_info info = {};
+	u32 info_copy, btf_copy;
+	void __user *ubtf;
+	u32 uinfo_len;
 
-	if (copy_to_user(udata, btf->data, copy_len) ||
-	    put_user(btf->data_size, &uattr->info.info_len))
+	uinfo = u64_to_user_ptr(attr->info.info);
+	uinfo_len = attr->info.info_len;
+
+	info_copy = min_t(u32, uinfo_len, sizeof(info));
+	if (copy_from_user(&info, uinfo, info_copy))
+		return -EFAULT;
+
+	info.id = btf->id;
+	ubtf = u64_to_user_ptr(info.btf);
+	btf_copy = min_t(u32, btf->data_size, info.btf_size);
+	if (copy_to_user(ubtf, btf->data, btf_copy))
+		return -EFAULT;
+	info.btf_size = btf->data_size;
+
+	if (copy_to_user(uinfo, &info, info_copy) ||
+	    put_user(info_copy, &uattr->info.info_len))
 		return -EFAULT;
 
 	return 0;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8b0a45d65454..d2895e3e5cbf 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2019,6 +2019,21 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
 	return 0;
 }
 
+static int bpf_btf_get_info_by_fd(struct btf *btf,
+				  const union bpf_attr *attr,
+				  union bpf_attr __user *uattr)
+{
+	struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
+	u32 info_len = attr->info.info_len;
+	int err;
+
+	err = check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
+	if (err)
+		return err;
+
+	return btf_get_info_by_fd(btf, attr, uattr);
+}
+
 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
 
 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
@@ -2042,7 +2057,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
 		err = bpf_map_get_info_by_fd(f.file->private_data, attr,
 					     uattr);
 	else if (f.file->f_op == &btf_fops)
-		err = btf_get_info_by_fd(f.file->private_data, attr, uattr);
+		err = bpf_btf_get_info_by_fd(f.file->private_data, attr, uattr);
 	else
 		err = -EINVAL;
 
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 4/6] bpf: btf: Some test_btf clean up
From: Martin KaFai Lau @ 2018-05-04 21:49 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180504214955.1058805-1-kafai@fb.com>

This patch adds a CHECK() macro for condition checking
and error report purpose.  Something similar to test_progs.c

It also counts the number of tests passed/skipped/failed and
print them at the end of the test run.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
---
 tools/testing/selftests/bpf/test_btf.c | 201 ++++++++++++++++-----------------
 1 file changed, 99 insertions(+), 102 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 7b39b1f712a1..b7880a20fad1 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -20,6 +20,30 @@
 
 #include "bpf_rlimit.h"
 
+static uint32_t pass_cnt;
+static uint32_t error_cnt;
+static uint32_t skip_cnt;
+
+#define CHECK(condition, format...) ({					\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__);	\
+		fprintf(stderr, format);				\
+	}								\
+	__ret;								\
+})
+
+static int count_result(int err)
+{
+	if (err)
+		error_cnt++;
+	else
+		pass_cnt++;
+
+	fprintf(stderr, "\n");
+	return err;
+}
+
 #define min(a, b) ((a) < (b) ? (a) : (b))
 #define __printf(a, b)	__attribute__((format(printf, a, b)))
 
@@ -894,17 +918,13 @@ static void *btf_raw_create(const struct btf_header *hdr,
 	void *raw_btf;
 
 	type_sec_size = get_type_sec_size(raw_types);
-	if (type_sec_size < 0) {
-		fprintf(stderr, "Cannot get nr_raw_types\n");
+	if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types"))
 		return NULL;
-	}
 
 	size_needed = sizeof(*hdr) + type_sec_size + str_sec_size;
 	raw_btf = malloc(size_needed);
-	if (!raw_btf) {
-		fprintf(stderr, "Cannot allocate memory for raw_btf\n");
+	if (CHECK(!raw_btf, "Cannot allocate memory for raw_btf"))
 		return NULL;
-	}
 
 	/* Copy header */
 	memcpy(raw_btf, hdr, sizeof(*hdr));
@@ -915,8 +935,7 @@ static void *btf_raw_create(const struct btf_header *hdr,
 	for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) {
 		if (raw_types[i] == NAME_TBD) {
 			next_str = get_next_str(next_str, end_str);
-			if (!next_str) {
-				fprintf(stderr, "Error in getting next_str\n");
+			if (CHECK(!next_str, "Error in getting next_str")) {
 				free(raw_btf);
 				return NULL;
 			}
@@ -973,9 +992,8 @@ static int do_test_raw(unsigned int test_num)
 	free(raw_btf);
 
 	err = ((btf_fd == -1) != test->btf_load_err);
-	if (err)
-		fprintf(stderr, "btf_load_err:%d btf_fd:%d\n",
-			test->btf_load_err, btf_fd);
+	CHECK(err, "btf_fd:%d test->btf_load_err:%u",
+	      btf_fd, test->btf_load_err);
 
 	if (err || btf_fd == -1)
 		goto done;
@@ -992,16 +1010,15 @@ static int do_test_raw(unsigned int test_num)
 	map_fd = bpf_create_map_xattr(&create_attr);
 
 	err = ((map_fd == -1) != test->map_create_err);
-	if (err)
-		fprintf(stderr, "map_create_err:%d map_fd:%d\n",
-			test->map_create_err, map_fd);
+	CHECK(err, "map_fd:%d test->map_create_err:%u",
+	      map_fd, test->map_create_err);
 
 done:
 	if (!err)
-		fprintf(stderr, "OK\n");
+		fprintf(stderr, "OK");
 
 	if (*btf_log_buf && (err || args.always_log))
-		fprintf(stderr, "%s\n", btf_log_buf);
+		fprintf(stderr, "\n%s", btf_log_buf);
 
 	if (btf_fd != -1)
 		close(btf_fd);
@@ -1017,10 +1034,10 @@ static int test_raw(void)
 	int err = 0;
 
 	if (args.raw_test_num)
-		return do_test_raw(args.raw_test_num);
+		return count_result(do_test_raw(args.raw_test_num));
 
 	for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
-		err |= do_test_raw(i);
+		err |= count_result(do_test_raw(i));
 
 	return err;
 }
@@ -1080,8 +1097,7 @@ static int do_test_get_info(unsigned int test_num)
 	*btf_log_buf = '\0';
 
 	user_btf = malloc(raw_btf_size);
-	if (!user_btf) {
-		fprintf(stderr, "Cannot allocate memory for user_btf\n");
+	if (CHECK(!user_btf, "!user_btf")) {
 		err = -1;
 		goto done;
 	}
@@ -1089,9 +1105,7 @@ static int do_test_get_info(unsigned int test_num)
 	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
 			      btf_log_buf, BTF_LOG_BUF_SIZE,
 			      args.always_log);
-	if (btf_fd == -1) {
-		fprintf(stderr, "bpf_load_btf:%s(%d)\n",
-			strerror(errno), errno);
+	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
 		err = -1;
 		goto done;
 	}
@@ -1103,31 +1117,31 @@ static int do_test_get_info(unsigned int test_num)
 		       raw_btf_size - expected_nbytes);
 
 	err = bpf_obj_get_info_by_fd(btf_fd, user_btf, &user_btf_size);
-	if (err || user_btf_size != raw_btf_size ||
-	    memcmp(raw_btf, user_btf, expected_nbytes)) {
-		fprintf(stderr,
-			"err:%d(errno:%d) raw_btf_size:%u user_btf_size:%u expected_nbytes:%u memcmp:%d\n",
-			err, errno,
-			raw_btf_size, user_btf_size, expected_nbytes,
-			memcmp(raw_btf, user_btf, expected_nbytes));
+	if (CHECK(err || user_btf_size != raw_btf_size ||
+		  memcmp(raw_btf, user_btf, expected_nbytes),
+		  "err:%d(errno:%d) raw_btf_size:%u user_btf_size:%u expected_nbytes:%u memcmp:%d",
+		  err, errno,
+		  raw_btf_size, user_btf_size, expected_nbytes,
+		  memcmp(raw_btf, user_btf, expected_nbytes))) {
 		err = -1;
 		goto done;
 	}
 
 	while (expected_nbytes < raw_btf_size) {
 		fprintf(stderr, "%u...", expected_nbytes);
-		if (user_btf[expected_nbytes++] != 0xff) {
-			fprintf(stderr, "!= 0xff\n");
+		if (CHECK(user_btf[expected_nbytes++] != 0xff,
+			  "user_btf[%u]:%x != 0xff", expected_nbytes - 1,
+			  user_btf[expected_nbytes - 1])) {
 			err = -1;
 			goto done;
 		}
 	}
 
-	fprintf(stderr, "OK\n");
+	fprintf(stderr, "OK");
 
 done:
 	if (*btf_log_buf && (err || args.always_log))
-		fprintf(stderr, "%s\n", btf_log_buf);
+		fprintf(stderr, "\n%s", btf_log_buf);
 
 	free(raw_btf);
 	free(user_btf);
@@ -1144,10 +1158,10 @@ static int test_get_info(void)
 	int err = 0;
 
 	if (args.get_info_test_num)
-		return do_test_get_info(args.get_info_test_num);
+		return count_result(do_test_get_info(args.get_info_test_num));
 
 	for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
-		err |= do_test_get_info(i);
+		err |= count_result(do_test_get_info(i));
 
 	return err;
 }
@@ -1175,28 +1189,21 @@ static int file_has_btf_elf(const char *fn)
 	Elf *elf;
 	int ret;
 
-	if (elf_version(EV_CURRENT) == EV_NONE) {
-		fprintf(stderr, "Failed to init libelf\n");
+	if (CHECK(elf_version(EV_CURRENT) == EV_NONE,
+		  "elf_version(EV_CURRENT) == EV_NONE"))
 		return -1;
-	}
 
 	elf_fd = open(fn, O_RDONLY);
-	if (elf_fd == -1) {
-		fprintf(stderr, "Cannot open file %s: %s(%d)\n",
-			fn, strerror(errno), errno);
+	if (CHECK(elf_fd == -1, "open(%s): errno:%d", fn, errno))
 		return -1;
-	}
 
 	elf = elf_begin(elf_fd, ELF_C_READ, NULL);
-	if (!elf) {
-		fprintf(stderr, "Failed to read ELF from %s. %s\n", fn,
-			elf_errmsg(elf_errno()));
+	if (CHECK(!elf, "elf_begin(%s): %s", fn, elf_errmsg(elf_errno()))) {
 		ret = -1;
 		goto done;
 	}
 
-	if (!gelf_getehdr(elf, &ehdr)) {
-		fprintf(stderr, "Failed to get EHDR from %s\n", fn);
+	if (CHECK(!gelf_getehdr(elf, &ehdr), "!gelf_getehdr(%s)", fn)) {
 		ret = -1;
 		goto done;
 	}
@@ -1205,9 +1212,8 @@ static int file_has_btf_elf(const char *fn)
 		const char *sh_name;
 		GElf_Shdr sh;
 
-		if (gelf_getshdr(scn, &sh) != &sh) {
-			fprintf(stderr,
-				"Failed to get section header from %s\n", fn);
+		if (CHECK(gelf_getshdr(scn, &sh) != &sh,
+			  "file:%s gelf_getshdr != &sh", fn)) {
 			ret = -1;
 			goto done;
 		}
@@ -1243,53 +1249,44 @@ static int do_test_file(unsigned int test_num)
 		return err;
 
 	if (err == 0) {
-		fprintf(stderr, "SKIP. No ELF %s found\n", BTF_ELF_SEC);
+		fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
+		skip_cnt++;
 		return 0;
 	}
 
 	obj = bpf_object__open(test->file);
-	if (IS_ERR(obj))
+	if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
 		return PTR_ERR(obj);
 
 	err = bpf_object__btf_fd(obj);
-	if (err == -1) {
-		fprintf(stderr, "bpf_object__btf_fd: -1\n");
+	if (CHECK(err == -1, "bpf_object__btf_fd: -1"))
 		goto done;
-	}
 
 	prog = bpf_program__next(NULL, obj);
-	if (!prog) {
-		fprintf(stderr, "Cannot find bpf_prog\n");
+	if (CHECK(!prog, "Cannot find bpf_prog")) {
 		err = -1;
 		goto done;
 	}
 
 	bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
 	err = bpf_object__load(obj);
-	if (err < 0) {
-		fprintf(stderr, "bpf_object__load: %d\n", err);
+	if (CHECK(err < 0, "bpf_object__load: %d", err))
 		goto done;
-	}
 
 	map = bpf_object__find_map_by_name(obj, "btf_map");
-	if (!map) {
-		fprintf(stderr, "btf_map not found\n");
+	if (CHECK(!map, "btf_map not found")) {
 		err = -1;
 		goto done;
 	}
 
 	err = (bpf_map__btf_key_id(map) == 0 || bpf_map__btf_value_id(map) == 0)
 		!= test->btf_kv_notfound;
-	if (err) {
-		fprintf(stderr,
-			"btf_kv_notfound:%u btf_key_id:%u btf_value_id:%u\n",
-			test->btf_kv_notfound,
-			bpf_map__btf_key_id(map),
-			bpf_map__btf_value_id(map));
+	if (CHECK(err, "btf_key_id:%u btf_value_id:%u test->btf_kv_notfound:%u",
+		  bpf_map__btf_key_id(map), bpf_map__btf_value_id(map),
+		  test->btf_kv_notfound))
 		goto done;
-	}
 
-	fprintf(stderr, "OK\n");
+	fprintf(stderr, "OK");
 
 done:
 	bpf_object__close(obj);
@@ -1302,10 +1299,10 @@ static int test_file(void)
 	int err = 0;
 
 	if (args.file_test_num)
-		return do_test_file(args.file_test_num);
+		return count_result(do_test_file(args.file_test_num));
 
 	for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
-		err |= do_test_file(i);
+		err |= count_result(do_test_file(i));
 
 	return err;
 }
@@ -1425,7 +1422,7 @@ static int test_pprint(void)
 	unsigned int key;
 	uint8_t *raw_btf;
 	ssize_t nread;
-	int err;
+	int err, ret;
 
 	fprintf(stderr, "%s......", test->descr);
 	raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
@@ -1441,10 +1438,8 @@ static int test_pprint(void)
 			      args.always_log);
 	free(raw_btf);
 
-	if (btf_fd == -1) {
+	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
 		err = -1;
-		fprintf(stderr, "bpf_load_btf: %s(%d)\n",
-			strerror(errno), errno);
 		goto done;
 	}
 
@@ -1458,26 +1453,23 @@ static int test_pprint(void)
 	create_attr.btf_value_id = test->value_id;
 
 	map_fd = bpf_create_map_xattr(&create_attr);
-	if (map_fd == -1) {
+	if (CHECK(map_fd == -1, "errno:%d", errno)) {
 		err = -1;
-		fprintf(stderr, "bpf_creat_map_btf: %s(%d)\n",
-			strerror(errno), errno);
 		goto done;
 	}
 
-	if (snprintf(pin_path, sizeof(pin_path), "%s/%s",
-		     "/sys/fs/bpf", test->map_name) == sizeof(pin_path)) {
+	ret = snprintf(pin_path, sizeof(pin_path), "%s/%s",
+		       "/sys/fs/bpf", test->map_name);
+
+	if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long",
+		  "/sys/fs/bpf", test->map_name)) {
 		err = -1;
-		fprintf(stderr, "pin_path is too long\n");
 		goto done;
 	}
 
 	err = bpf_obj_pin(map_fd, pin_path);
-	if (err) {
-		fprintf(stderr, "Cannot pin to %s. %s(%d).\n", pin_path,
-			strerror(errno), errno);
+	if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno))
 		goto done;
-	}
 
 	for (key = 0; key < test->max_entries; key++) {
 		set_pprint_mapv(&mapv, key);
@@ -1485,10 +1477,8 @@ static int test_pprint(void)
 	}
 
 	pin_file = fopen(pin_path, "r");
-	if (!pin_file) {
+	if (CHECK(!pin_file, "fopen(%s): errno:%d", pin_path, errno)) {
 		err = -1;
-		fprintf(stderr, "fopen(%s): %s(%d)\n", pin_path,
-			strerror(errno), errno);
 		goto done;
 	}
 
@@ -1497,9 +1487,8 @@ static int test_pprint(void)
 	       *line == '#')
 		;
 
-	if (nread <= 0) {
+	if (CHECK(nread <= 0, "Unexpected EOF")) {
 		err = -1;
-		fprintf(stderr, "Unexpected EOF\n");
 		goto done;
 	}
 
@@ -1518,9 +1507,9 @@ static int test_pprint(void)
 					  mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7],
 					  pprint_enum_str[mapv.aenum]);
 
-		if (nexpected_line == sizeof(expected_line)) {
+		if (CHECK(nexpected_line == sizeof(expected_line),
+			  "expected_line is too long")) {
 			err = -1;
-			fprintf(stderr, "expected_line is too long\n");
 			goto done;
 		}
 
@@ -1535,15 +1524,15 @@ static int test_pprint(void)
 		nread = getline(&line, &line_len, pin_file);
 	} while (++key < test->max_entries && nread > 0);
 
-	if (key < test->max_entries) {
+	if (CHECK(key < test->max_entries,
+		  "Unexpected EOF. key:%u test->max_entries:%u",
+		  key, test->max_entries)) {
 		err = -1;
-		fprintf(stderr, "Unexpected EOF\n");
 		goto done;
 	}
 
-	if (nread > 0) {
+	if (CHECK(nread > 0, "Unexpected extra pprint output: %s", line)) {
 		err = -1;
-		fprintf(stderr, "Unexpected extra pprint output: %s\n", line);
 		goto done;
 	}
 
@@ -1551,9 +1540,9 @@ static int test_pprint(void)
 
 done:
 	if (!err)
-		fprintf(stderr, "OK\n");
+		fprintf(stderr, "OK");
 	if (*btf_log_buf && (err || args.always_log))
-		fprintf(stderr, "%s\n", btf_log_buf);
+		fprintf(stderr, "\n%s", btf_log_buf);
 	if (btf_fd != -1)
 		close(btf_fd);
 	if (map_fd != -1)
@@ -1634,6 +1623,12 @@ static int parse_args(int argc, char **argv)
 	return 0;
 }
 
+static void print_summary(void)
+{
+	fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n",
+		pass_cnt - skip_cnt, skip_cnt, error_cnt);
+}
+
 int main(int argc, char **argv)
 {
 	int err = 0;
@@ -1655,15 +1650,17 @@ int main(int argc, char **argv)
 		err |= test_file();
 
 	if (args.pprint_test)
-		err |= test_pprint();
+		err |= count_result(test_pprint());
 
 	if (args.raw_test || args.get_info_test || args.file_test ||
 	    args.pprint_test)
-		return err;
+		goto done;
 
 	err |= test_raw();
 	err |= test_get_info();
 	err |= test_file();
 
+done:
+	print_summary();
 	return err;
 }
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 6/6] bpf: btf: Tests for BPF_OBJ_GET_INFO_BY_FD and BPF_BTF_GET_FD_BY_ID
From: Martin KaFai Lau @ 2018-05-04 21:49 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180504214955.1058805-1-kafai@fb.com>

This patch adds test for BPF_BTF_GET_FD_BY_ID and the new
btf_id/btf_key_id/btf_value_id in the "struct bpf_map_info".

It also modifies the existing BPF_OBJ_GET_INFO_BY_FD test
to reflect the new "struct bpf_btf_info".

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
---
 tools/lib/bpf/bpf.c                    |  10 ++
 tools/lib/bpf/bpf.h                    |   1 +
 tools/testing/selftests/bpf/test_btf.c | 289 +++++++++++++++++++++++++++++++--
 3 files changed, 287 insertions(+), 13 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 76b36cc16e7f..a3a8fb2ac697 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -458,6 +458,16 @@ int bpf_map_get_fd_by_id(__u32 id)
 	return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
 }
 
+int bpf_btf_get_fd_by_id(__u32 id)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.btf_id = id;
+
+	return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
 int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
 {
 	union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 553b11ad52b3..fb3a146d92ff 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -98,6 +98,7 @@ int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
 int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
 int bpf_prog_get_fd_by_id(__u32 id);
 int bpf_map_get_fd_by_id(__u32 id);
+int bpf_btf_get_fd_by_id(__u32 id);
 int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
 int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
 		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index b7880a20fad1..c8bceae7ec02 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -1047,9 +1047,13 @@ struct btf_get_info_test {
 	const char *str_sec;
 	__u32 raw_types[MAX_NR_RAW_TYPES];
 	__u32 str_sec_size;
-	int info_size_delta;
+	int btf_size_delta;
+	int (*special_test)(unsigned int test_num);
 };
 
+static int test_big_btf_info(unsigned int test_num);
+static int test_btf_id(unsigned int test_num);
+
 const struct btf_get_info_test get_info_tests[] = {
 {
 	.descr = "== raw_btf_size+1",
@@ -1060,7 +1064,7 @@ const struct btf_get_info_test get_info_tests[] = {
 	},
 	.str_sec = "",
 	.str_sec_size = sizeof(""),
-	.info_size_delta = 1,
+	.btf_size_delta = 1,
 },
 {
 	.descr = "== raw_btf_size-3",
@@ -1071,20 +1075,274 @@ const struct btf_get_info_test get_info_tests[] = {
 	},
 	.str_sec = "",
 	.str_sec_size = sizeof(""),
-	.info_size_delta = -3,
+	.btf_size_delta = -3,
+},
+{
+	.descr = "Large bpf_btf_info",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.special_test = test_big_btf_info,
+},
+{
+	.descr = "BTF ID",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* unsigned int */			/* [2] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.special_test = test_btf_id,
 },
 };
 
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64)(unsigned long)ptr;
+}
+
+static int test_big_btf_info(unsigned int test_num)
+{
+	const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+	uint8_t *raw_btf = NULL, *user_btf = NULL;
+	unsigned int raw_btf_size;
+	struct {
+		struct bpf_btf_info info;
+		uint64_t garbage;
+	} info_garbage;
+	struct bpf_btf_info *info;
+	int btf_fd = -1, err;
+	uint32_t info_len;
+
+	raw_btf = btf_raw_create(&hdr_tmpl,
+				 test->raw_types,
+				 test->str_sec,
+				 test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	*btf_log_buf = '\0';
+
+	user_btf = malloc(raw_btf_size);
+	if (CHECK(!user_btf, "!user_btf")) {
+		err = -1;
+		goto done;
+	}
+
+	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+			      btf_log_buf, BTF_LOG_BUF_SIZE,
+			      args.always_log);
+	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	/*
+	 * GET_INFO should error out if the userspace info
+	 * has non zero tailing bytes.
+	 */
+	info = &info_garbage.info;
+	memset(info, 0, sizeof(*info));
+	info_garbage.garbage = 0xdeadbeef;
+	info_len = sizeof(info_garbage);
+	info->btf = ptr_to_u64(user_btf);
+	info->btf_size = raw_btf_size;
+
+	err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+	if (CHECK(!err, "!err")) {
+		err = -1;
+		goto done;
+	}
+
+	/*
+	 * GET_INFO should succeed even info_len is larger than
+	 * the kernel supported as long as tailing bytes are zero.
+	 * The kernel supported info len should also be returned
+	 * to userspace.
+	 */
+	info_garbage.garbage = 0;
+	err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+	if (CHECK(err || info_len != sizeof(*info),
+		  "err:%d errno:%d info_len:%u sizeof(*info):%lu",
+		  err, errno, info_len, sizeof(*info))) {
+		err = -1;
+		goto done;
+	}
+
+	fprintf(stderr, "OK");
+
+done:
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "\n%s", btf_log_buf);
+
+	free(raw_btf);
+	free(user_btf);
+
+	if (btf_fd != -1)
+		close(btf_fd);
+
+	return err;
+}
+
+static int test_btf_id(unsigned int test_num)
+{
+	const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+	struct bpf_create_map_attr create_attr = {};
+	uint8_t *raw_btf = NULL, *user_btf[2] = {};
+	int btf_fd[2] = {-1, -1}, map_fd = -1;
+	struct bpf_map_info map_info = {};
+	struct bpf_btf_info info[2] = {};
+	unsigned int raw_btf_size;
+	uint32_t info_len;
+	int err, i, ret;
+
+	raw_btf = btf_raw_create(&hdr_tmpl,
+				 test->raw_types,
+				 test->str_sec,
+				 test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	*btf_log_buf = '\0';
+
+	for (i = 0; i < 2; i++) {
+		user_btf[i] = malloc(raw_btf_size);
+		if (CHECK(!user_btf[i], "!user_btf[%d]", i)) {
+			err = -1;
+			goto done;
+		}
+		info[i].btf = ptr_to_u64(user_btf[i]);
+		info[i].btf_size = raw_btf_size;
+	}
+
+	btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
+				 btf_log_buf, BTF_LOG_BUF_SIZE,
+				 args.always_log);
+	if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	/* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */
+	info_len = sizeof(info[0]);
+	err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len);
+	if (CHECK(err, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
+	if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	ret = 0;
+	err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len);
+	if (CHECK(err || info[0].id != info[1].id ||
+		  info[0].btf_size != info[1].btf_size ||
+		  (ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)),
+		  "err:%d errno:%d id0:%u id1:%u btf_size0:%u btf_size1:%u memcmp:%d",
+		  err, errno, info[0].id, info[1].id,
+		  info[0].btf_size, info[1].btf_size, ret)) {
+		err = -1;
+		goto done;
+	}
+
+	/* Test btf members in struct bpf_map_info */
+	create_attr.name = "test_btf_id";
+	create_attr.map_type = BPF_MAP_TYPE_ARRAY;
+	create_attr.key_size = sizeof(int);
+	create_attr.value_size = sizeof(unsigned int);
+	create_attr.max_entries = 4;
+	create_attr.btf_fd = btf_fd[0];
+	create_attr.btf_key_id = 1;
+	create_attr.btf_value_id = 2;
+
+	map_fd = bpf_create_map_xattr(&create_attr);
+	if (CHECK(map_fd == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	info_len = sizeof(map_info);
+	err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+	if (CHECK(err || map_info.btf_id != info[0].id ||
+		  map_info.btf_key_id != 1 || map_info.btf_value_id != 2,
+		  "err:%d errno:%d info.id:%u btf_id:%u btf_key_id:%u btf_value_id:%u",
+		  err, errno, info[0].id, map_info.btf_id, map_info.btf_key_id,
+		  map_info.btf_value_id)) {
+		err = -1;
+		goto done;
+	}
+
+	for (i = 0; i < 2; i++) {
+		close(btf_fd[i]);
+		btf_fd[i] = -1;
+	}
+
+	/* Test BTF ID is removed from the kernel */
+	btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
+	if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+	close(btf_fd[0]);
+	btf_fd[0] = -1;
+
+	/* The map holds the last ref to BTF and its btf_id */
+	close(map_fd);
+	map_fd = -1;
+	btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
+	if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
+		err = -1;
+		goto done;
+	}
+
+	fprintf(stderr, "OK");
+
+done:
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "\n%s", btf_log_buf);
+
+	free(raw_btf);
+	if (map_fd != -1)
+		close(map_fd);
+	for (i = 0; i < 2; i++) {
+		free(user_btf[i]);
+		if (btf_fd[i] != -1)
+			close(btf_fd[i]);
+	}
+
+	return err;
+}
+
 static int do_test_get_info(unsigned int test_num)
 {
 	const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
 	unsigned int raw_btf_size, user_btf_size, expected_nbytes;
 	uint8_t *raw_btf = NULL, *user_btf = NULL;
-	int btf_fd = -1, err;
+	struct bpf_btf_info info = {};
+	int btf_fd = -1, err, ret;
+	uint32_t info_len;
 
-	fprintf(stderr, "BTF GET_INFO_BY_ID test[%u] (%s): ",
+	fprintf(stderr, "BTF GET_INFO test[%u] (%s): ",
 		test_num, test->descr);
 
+	if (test->special_test)
+		return test->special_test(test_num);
+
 	raw_btf = btf_raw_create(&hdr_tmpl,
 				 test->raw_types,
 				 test->str_sec,
@@ -1110,19 +1368,24 @@ static int do_test_get_info(unsigned int test_num)
 		goto done;
 	}
 
-	user_btf_size = (int)raw_btf_size + test->info_size_delta;
+	user_btf_size = (int)raw_btf_size + test->btf_size_delta;
 	expected_nbytes = min(raw_btf_size, user_btf_size);
 	if (raw_btf_size > expected_nbytes)
 		memset(user_btf + expected_nbytes, 0xff,
 		       raw_btf_size - expected_nbytes);
 
-	err = bpf_obj_get_info_by_fd(btf_fd, user_btf, &user_btf_size);
-	if (CHECK(err || user_btf_size != raw_btf_size ||
-		  memcmp(raw_btf, user_btf, expected_nbytes),
-		  "err:%d(errno:%d) raw_btf_size:%u user_btf_size:%u expected_nbytes:%u memcmp:%d",
-		  err, errno,
-		  raw_btf_size, user_btf_size, expected_nbytes,
-		  memcmp(raw_btf, user_btf, expected_nbytes))) {
+	info_len = sizeof(info);
+	info.btf = ptr_to_u64(user_btf);
+	info.btf_size = user_btf_size;
+
+	ret = 0;
+	err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len);
+	if (CHECK(err || !info.id || info_len != sizeof(info) ||
+		  info.btf_size != raw_btf_size ||
+		  (ret = memcmp(raw_btf, user_btf, expected_nbytes)),
+		  "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%lu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
+		  err, errno, info.id, info_len, sizeof(info),
+		  raw_btf_size, info.btf_size, expected_nbytes, ret)) {
 		err = -1;
 		goto done;
 	}
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 0/6] Introduce BTF ID
From: Martin KaFai Lau @ 2018-05-04 21:49 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team

This series introduces BTF ID which is exposed through
the new BPF_BTF_GET_FD_BY_ID cmd, new "struct bpf_btf_info"
and new members in the "struct bpf_map_info".

Please see individual patch for details.

Martin KaFai Lau (6):
  bpf: btf: Avoid WARN_ON when CONFIG_REFCOUNT_FULL=y
  bpf: btf: Introduce BTF ID
  bpf: btf: Add struct bpf_btf_info
  bpf: btf: Some test_btf clean up
  bpf: btf: Update tools/include/uapi/linux/btf.h with BTF ID
  bpf: btf: Tests for BPF_OBJ_GET_INFO_BY_FD and BPF_BTF_GET_FD_BY_ID

 include/linux/btf.h                    |   2 +
 include/uapi/linux/bpf.h               |  11 +
 kernel/bpf/btf.c                       | 136 ++++++++--
 kernel/bpf/syscall.c                   |  41 ++-
 tools/include/uapi/linux/bpf.h         |  11 +
 tools/lib/bpf/bpf.c                    |  10 +
 tools/lib/bpf/bpf.h                    |   1 +
 tools/testing/selftests/bpf/test_btf.c | 478 +++++++++++++++++++++++++--------
 8 files changed, 563 insertions(+), 127 deletions(-)

-- 
2.9.5

^ permalink raw reply

* [PATCH bpf-next 2/6] bpf: btf: Introduce BTF ID
From: Martin KaFai Lau @ 2018-05-04 21:49 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180504214955.1058805-1-kafai@fb.com>

This patch gives an ID to each loaded BTF.  The ID is allocated by
the idr like the existing prog-id and map-id.

The bpf_put(map->btf) is moved to __bpf_map_put() so that the
userspace can stop seeing the BTF ID ASAP when the last BTF
refcnt is gone.

It also makes BTF accessible from userspace through the
1. new BPF_BTF_GET_FD_BY_ID command.  It is limited to CAP_SYS_ADMIN
   which is inline with the BPF_BTF_LOAD cmd and the existing
   BPF_[MAP|PROG]_GET_FD_BY_ID cmd.
2. new btf_id (and btf_key_id + btf_value_id) in "struct bpf_map_info"

Once the BTF ID handler is accessible from userspace, freeing a BTF
object has to go through a rcu period.  The BPF_BTF_GET_FD_BY_ID cmd
can then be done under a rcu_read_lock() instead of taking
spin_lock.
[Note: A similar rcu usage can be done to the existing
       bpf_prog_get_fd_by_id() in a follow up patch]

When processing the BPF_BTF_GET_FD_BY_ID cmd,
refcount_inc_not_zero() is needed because the BTF object
could be already in the rcu dead row .  btf_get() is
removed since its usage is currently limited to btf.c
alone.  refcount_inc() is used directly instead.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
---
 include/linux/btf.h      |   2 +
 include/uapi/linux/bpf.h |   5 +++
 kernel/bpf/btf.c         | 108 ++++++++++++++++++++++++++++++++++++++++++-----
 kernel/bpf/syscall.c     |  24 ++++++++++-
 4 files changed, 128 insertions(+), 11 deletions(-)

diff --git a/include/linux/btf.h b/include/linux/btf.h
index a966dc6d61ee..e076c4697049 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -44,5 +44,7 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
 					u32 *ret_size);
 void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
 		       struct seq_file *m);
+int btf_get_fd_by_id(u32 id);
+u32 btf_id(const struct btf *btf);
 
 #endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 93d5a4eeec2a..6106f23a9a8a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -96,6 +96,7 @@ enum bpf_cmd {
 	BPF_PROG_QUERY,
 	BPF_RAW_TRACEPOINT_OPEN,
 	BPF_BTF_LOAD,
+	BPF_BTF_GET_FD_BY_ID,
 };
 
 enum bpf_map_type {
@@ -344,6 +345,7 @@ union bpf_attr {
 			__u32		start_id;
 			__u32		prog_id;
 			__u32		map_id;
+			__u32		btf_id;
 		};
 		__u32		next_id;
 		__u32		open_flags;
@@ -2130,6 +2132,9 @@ struct bpf_map_info {
 	__u32 ifindex;
 	__u64 netns_dev;
 	__u64 netns_ino;
+	__u32 btf_id;
+	__u32 btf_key_id;
+	__u32 btf_value_id;
 } __attribute__((aligned(8)));
 
 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index fa0dce0452e7..40950b6bf395 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -11,6 +11,7 @@
 #include <linux/file.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
+#include <linux/idr.h>
 #include <linux/bpf_verifier.h>
 #include <linux/btf.h>
 
@@ -179,6 +180,9 @@
 	     i < btf_type_vlen(struct_type);				\
 	     i++, member++)
 
+static DEFINE_IDR(btf_idr);
+static DEFINE_SPINLOCK(btf_idr_lock);
+
 struct btf {
 	union {
 		struct btf_header *hdr;
@@ -193,6 +197,8 @@ struct btf {
 	u32 types_size;
 	u32 data_size;
 	refcount_t refcnt;
+	u32 id;
+	struct rcu_head rcu;
 };
 
 enum verifier_phase {
@@ -598,6 +604,42 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
 	return 0;
 }
 
+static int btf_alloc_id(struct btf *btf)
+{
+	int id;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock_bh(&btf_idr_lock);
+	id = idr_alloc_cyclic(&btf_idr, btf, 1, INT_MAX, GFP_ATOMIC);
+	if (id > 0)
+		btf->id = id;
+	spin_unlock_bh(&btf_idr_lock);
+	idr_preload_end();
+
+	if (WARN_ON_ONCE(!id))
+		return -ENOSPC;
+
+	return id > 0 ? 0 : id;
+}
+
+static void btf_free_id(struct btf *btf)
+{
+	unsigned long flags;
+
+	/*
+	 * In map-in-map, calling map_delete_elem() on outer
+	 * map will call bpf_map_put on the inner map.
+	 * It will then eventually call btf_free_id()
+	 * on the inner map.  Some of the map_delete_elem()
+	 * implementation may have irq disabled, so
+	 * we need to use the _irqsave() version instead
+	 * of the _bh() version.
+	 */
+	spin_lock_irqsave(&btf_idr_lock, flags);
+	idr_remove(&btf_idr, btf->id);
+	spin_unlock_irqrestore(&btf_idr_lock, flags);
+}
+
 static void btf_free(struct btf *btf)
 {
 	kvfree(btf->types);
@@ -607,15 +649,19 @@ static void btf_free(struct btf *btf)
 	kfree(btf);
 }
 
-static void btf_get(struct btf *btf)
+static void btf_free_rcu(struct rcu_head *rcu)
 {
-	refcount_inc(&btf->refcnt);
+	struct btf *btf = container_of(rcu, struct btf, rcu);
+
+	btf_free(btf);
 }
 
 void btf_put(struct btf *btf)
 {
-	if (btf && refcount_dec_and_test(&btf->refcnt))
-		btf_free(btf);
+	if (btf && refcount_dec_and_test(&btf->refcnt)) {
+		btf_free_id(btf);
+		call_rcu(&btf->rcu, btf_free_rcu);
+	}
 }
 
 static int env_resolve_init(struct btf_verifier_env *env)
@@ -2006,10 +2052,15 @@ const struct file_operations btf_fops = {
 	.release	= btf_release,
 };
 
+static int __btf_new_fd(struct btf *btf)
+{
+	return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC);
+}
+
 int btf_new_fd(const union bpf_attr *attr)
 {
 	struct btf *btf;
-	int fd;
+	int ret;
 
 	btf = btf_parse(u64_to_user_ptr(attr->btf),
 			attr->btf_size, attr->btf_log_level,
@@ -2018,12 +2069,23 @@ int btf_new_fd(const union bpf_attr *attr)
 	if (IS_ERR(btf))
 		return PTR_ERR(btf);
 
-	fd = anon_inode_getfd("btf", &btf_fops, btf,
-			      O_RDONLY | O_CLOEXEC);
-	if (fd < 0)
+	ret = btf_alloc_id(btf);
+	if (ret) {
+		btf_free(btf);
+		return ret;
+	}
+
+	/*
+	 * The BTF ID is published to the userspace.
+	 * All BTF free must go through call_rcu() from
+	 * now on (i.e. free by calling btf_put()).
+	 */
+
+	ret = __btf_new_fd(btf);
+	if (ret < 0)
 		btf_put(btf);
 
-	return fd;
+	return ret;
 }
 
 struct btf *btf_get_by_fd(int fd)
@@ -2042,7 +2104,7 @@ struct btf *btf_get_by_fd(int fd)
 	}
 
 	btf = f.file->private_data;
-	btf_get(btf);
+	refcount_inc(&btf->refcnt);
 	fdput(f);
 
 	return btf;
@@ -2062,3 +2124,29 @@ int btf_get_info_by_fd(const struct btf *btf,
 
 	return 0;
 }
+
+int btf_get_fd_by_id(u32 id)
+{
+	struct btf *btf;
+	int fd;
+
+	rcu_read_lock();
+	btf = idr_find(&btf_idr, id);
+	if (!btf || !refcount_inc_not_zero(&btf->refcnt))
+		btf = ERR_PTR(-ENOENT);
+	rcu_read_unlock();
+
+	if (IS_ERR(btf))
+		return PTR_ERR(btf);
+
+	fd = __btf_new_fd(btf);
+	if (fd < 0)
+		btf_put(btf);
+
+	return fd;
+}
+
+u32 btf_id(const struct btf *btf)
+{
+	return btf->id;
+}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 263e13ede029..8b0a45d65454 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -252,7 +252,6 @@ static void bpf_map_free_deferred(struct work_struct *work)
 
 	bpf_map_uncharge_memlock(map);
 	security_bpf_map_free(map);
-	btf_put(map->btf);
 	/* implementation dependent freeing */
 	map->ops->map_free(map);
 }
@@ -273,6 +272,7 @@ static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
 	if (atomic_dec_and_test(&map->refcnt)) {
 		/* bpf_map_free_id() must be called first */
 		bpf_map_free_id(map, do_idr_lock);
+		btf_put(map->btf);
 		INIT_WORK(&map->work, bpf_map_free_deferred);
 		schedule_work(&map->work);
 	}
@@ -2000,6 +2000,12 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
 	info.map_flags = map->map_flags;
 	memcpy(info.name, map->name, sizeof(map->name));
 
+	if (map->btf) {
+		info.btf_id = btf_id(map->btf);
+		info.btf_key_id = map->btf_key_id;
+		info.btf_value_id = map->btf_value_id;
+	}
+
 	if (bpf_map_is_dev_bound(map)) {
 		err = bpf_map_offload_info_fill(&info, map);
 		if (err)
@@ -2057,6 +2063,19 @@ static int bpf_btf_load(const union bpf_attr *attr)
 	return btf_new_fd(attr);
 }
 
+#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
+
+static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
+{
+	if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return btf_get_fd_by_id(attr->btf_id);
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr = {};
@@ -2140,6 +2159,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_BTF_LOAD:
 		err = bpf_btf_load(&attr);
 		break;
+	case BPF_BTF_GET_FD_BY_ID:
+		err = bpf_btf_get_fd_by_id(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
-- 
2.9.5

^ permalink raw reply related

* Re: [PATCH bpf-next 00/10] bpf: support offload of bpf_event_output()
From: Daniel Borkmann @ 2018-05-04 21:46 UTC (permalink / raw)
  To: Jakub Kicinski, alexei.starovoitov; +Cc: oss-drivers, netdev
In-Reply-To: <20180504013717.29317-1-jakub.kicinski@netronome.com>

On 05/04/2018 03:37 AM, Jakub Kicinski wrote:
> Hi!
> 
> This series centres on NFP offload of bpf_event_output().  The
> first patch allows perf event arrays to be used by offloaded
> programs.  Next patch makes the nfp driver keep track of such
> arrays to be able to filter FW events referring to maps.
> Perf event arrays are not device bound.  Having driver
> reimplement and manage the perf array seems brittle and unnecessary.
> 
> Patch 4 moves slightly the verifier step which replaces map fds
> with map pointers.  This is useful for nfp JIT since we can then
> easily replace host pointers with NFP table ids (patch 6).  This
> allows us to lift the limitation on map helpers having to be used
> with the same map pointer on all paths.  Second use of replacing
> fds with real host map pointers is that we can use the host map
> pointer as a key for FW events in perf event array offload.
> 
> Patch 5 adds perf event output offload support for the NFP.
> 
> There are some differences between bpf_event_output() offloaded
> and non-offloaded version.  The FW messages which carry events
> may get dropped and reordered relatively easily.  The return codes
> from the helper are also not guaranteed to match the host.  Users
> are warned about some of those discrepancies with a one time
> warning message to kernel logs.
> 
> bpftool gains an ability to dump perf ring events in a very simple
> format.  This was very useful for testing and simple debug, maybe
> it will be useful to others?
> 
> Last patch is a trivial comment fix.

Nice approach, applied to bpf-next, thanks Jakub!

^ permalink raw reply

* Re: [PATCH] cxgb4vf: fix t4vf_eth_xmit()'s return type
From: Casey Leedom @ 2018-05-04 21:41 UTC (permalink / raw)
  To: Luc Van Oostenryck, linux-kernel@vger.kernel.org; +Cc: netdev@vger.kernel.org
In-Reply-To: <20180424131902.5767-1-luc.vanoostenryck@gmail.com>

| From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
| Sent: Tuesday, April 24, 2018 6:19:02 AM
| 
| The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
| which is a typedef for an enum type, but the implementation in this
| driver returns an 'int'.
| 
| Fix this by returning 'netdev_tx_t' in this driver too.

Looks good to me.

Casey

^ permalink raw reply

* Re: [PATCH net-next] net: core: rework skb_probe_transport_header()
From: kbuild test robot @ 2018-05-04 21:35 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: kbuild-all, netdev, David S. Miller, Eric Dumazet, Jason Wang
In-Reply-To: <7cbdf466f4a1bf44ddbb948428dc7bb0dad091a7.1525340013.git.pabeni@redhat.com>

Hi Paolo,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Paolo-Abeni/net-core-rework-skb_probe_transport_header/20180504-041345
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

>> include/linux/skbuff.h:2360:32: sparse: Using plain integer as NULL pointer
   drivers/net/tun.c:2088:40: sparse: expression using sizeof(void)
   drivers/net/tun.c:2221:15: sparse: expression using sizeof(void)
   drivers/net/tun.c:2221:15: sparse: expression using sizeof(void)
   drivers/net/tun.c:2846:36: sparse: incorrect type in argument 2 (different address spaces) @@    expected struct tun_prog [noderef] <asn:4>**prog_p @@    got noderef] <asn:4>**prog_p @@
   drivers/net/tun.c:2846:36:    expected struct tun_prog [noderef] <asn:4>**prog_p
   drivers/net/tun.c:2846:36:    got struct tun_prog **prog_p
   drivers/net/tun.c:3142:42: sparse: incorrect type in argument 2 (different address spaces) @@    expected struct tun_prog **prog_p @@    got struct tun_prog [struct tun_prog **prog_p @@
   drivers/net/tun.c:3142:42:    expected struct tun_prog **prog_p
   drivers/net/tun.c:3142:42:    got struct tun_prog [noderef] <asn:4>**<noident>
   drivers/net/tun.c:3146:42: sparse: incorrect type in argument 2 (different address spaces) @@    expected struct tun_prog **prog_p @@    got struct tun_prog [struct tun_prog **prog_p @@
   drivers/net/tun.c:3146:42:    expected struct tun_prog **prog_p
   drivers/net/tun.c:3146:42:    got struct tun_prog [noderef] <asn:4>**<noident>
--
>> include/linux/skbuff.h:2360:32: sparse: Using plain integer as NULL pointer
   drivers/net/tap.c:879:15: sparse: expression using sizeof(void)
   drivers/net/tap.c:879:15: sparse: expression using sizeof(void)
--
   drivers/net/xen-netback/netback.c:175:21: sparse: expression using sizeof(void)
   drivers/net/xen-netback/netback.c:182:35: sparse: expression using sizeof(void)
   drivers/net/xen-netback/netback.c:182:35: sparse: expression using sizeof(void)
>> include/linux/skbuff.h:2360:32: sparse: Using plain integer as NULL pointer
   drivers/net/xen-netback/netback.c:1632:37: sparse: expression using sizeof(void)

vim +2360 include/linux/skbuff.h

  2349	
  2350	static inline void skb_probe_transport_header(struct sk_buff *skb,
  2351						      const int offset_hint)
  2352	{
  2353		struct flow_keys_basic keys;
  2354	
  2355		if (skb_transport_header_was_set(skb))
  2356			return;
  2357	
  2358		memset(&keys, 0, sizeof(keys));
  2359		if (__skb_flow_dissect(skb, &flow_keys_buf_dissector, &keys,
> 2360				       0, 0, 0, 0, 0))
  2361			skb_set_transport_header(skb, keys.control.thoff);
  2362		else
  2363			skb_set_transport_header(skb, offset_hint);
  2364	}
  2365	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply

* Re: [PATCH bpf-next 09/10] tools: bpftool: add simple perf event output reader
From: Alexei Starovoitov @ 2018-05-04 21:25 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: daniel, oss-drivers, netdev
In-Reply-To: <20180504013717.29317-10-jakub.kicinski@netronome.com>

On Thu, May 03, 2018 at 06:37:16PM -0700, Jakub Kicinski wrote:
> Users of BPF sooner or later discover perf_event_output() helpers
> and BPF_MAP_TYPE_PERF_EVENT_ARRAY.  Dumping this array type is
> not possible, however, we can add simple reading of perf events.
> Create a new event_pipe subcommand for maps, this sub command
> will only work with BPF_MAP_TYPE_PERF_EVENT_ARRAY maps.
> 
> Parts of the code from samples/bpf/trace_output_user.c.
> 
> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
> ---
>  .../bpf/bpftool/Documentation/bpftool-map.rst |  29 +-
>  tools/bpf/bpftool/Documentation/bpftool.rst   |   2 +-
>  tools/bpf/bpftool/Makefile                    |   7 +-
>  tools/bpf/bpftool/bash-completion/bpftool     |  36 +-
>  tools/bpf/bpftool/common.c                    |  19 +
>  tools/bpf/bpftool/main.h                      |   4 +
>  tools/bpf/bpftool/map.c                       |  19 +-
>  tools/bpf/bpftool/map_perf_ring.c             | 347 ++++++++++++++++++
>  8 files changed, 444 insertions(+), 19 deletions(-)
>  create mode 100644 tools/bpf/bpftool/map_perf_ring.c
> 
> diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
> index c3eef8c972cd..a6258bc8ec4f 100644
> --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
> +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
> @@ -22,12 +22,13 @@ MAP COMMANDS
>  =============
>  
>  |	**bpftool** **map { show | list }**   [*MAP*]
> -|	**bpftool** **map dump**    *MAP*
> -|	**bpftool** **map update**  *MAP*  **key** *DATA*   **value** *VALUE* [*UPDATE_FLAGS*]
> -|	**bpftool** **map lookup**  *MAP*  **key** *DATA*
> -|	**bpftool** **map getnext** *MAP* [**key** *DATA*]
> -|	**bpftool** **map delete**  *MAP*  **key** *DATA*
> -|	**bpftool** **map pin**     *MAP*  *FILE*
> +|	**bpftool** **map dump**       *MAP*
> +|	**bpftool** **map update**     *MAP*  **key** *DATA*   **value** *VALUE* [*UPDATE_FLAGS*]
> +|	**bpftool** **map lookup**     *MAP*  **key** *DATA*
> +|	**bpftool** **map getnext**    *MAP* [**key** *DATA*]
> +|	**bpftool** **map delete**     *MAP*  **key** *DATA*
> +|	**bpftool** **map pin**        *MAP*  *FILE*
> +|	**bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
>  |	**bpftool** **map help**
>  |
>  |	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
> @@ -76,6 +77,22 @@ DESCRIPTION
>  
>  		  Note: *FILE* must be located in *bpffs* mount.
>  
> +	**bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
> +		  Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
> +
> +		  Install perf rings into a perf event array map and dump
> +		  output of any bpf_perf_event_output() call in the kernel.
> +		  By default read the number of CPUs on the system and
> +		  install perf ring for each CPU in the corresponding index
> +		  in the array.
> +
> +		  If **cpu** and **index** are specified, install perf ring
> +		  for given **cpu** at **index** in the array (single ring).
> +
> +		  Note that installing a perf ring into an array will silently
> +		  replace any existing ring.  Any other application will stop
> +		  receiving events if it installed its rings earlier.
> +
>  	**bpftool map help**
>  		  Print short help message.
>  
> diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
> index 20689a321ffe..564cb0d9692b 100644
> --- a/tools/bpf/bpftool/Documentation/bpftool.rst
> +++ b/tools/bpf/bpftool/Documentation/bpftool.rst
> @@ -23,7 +23,7 @@ SYNOPSIS
>  
>  	*MAP-COMMANDS* :=
>  	{ **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
> -	| **pin** | **help** }
> +	| **pin** | **event_pipe** | **help** }
>  
>  	*PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
>  	| **load** | **help** }
> diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
> index 4e69782c4a79..892dbf095bff 100644
> --- a/tools/bpf/bpftool/Makefile
> +++ b/tools/bpf/bpftool/Makefile
> @@ -39,7 +39,12 @@ CC = gcc
>  
>  CFLAGS += -O2
>  CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
> -CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
> +CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
> +	-I$(srctree)/kernel/bpf/ \
> +	-I$(srctree)/tools/include \
> +	-I$(srctree)/tools/include/uapi \
> +	-I$(srctree)/tools/lib/bpf \
> +	-I$(srctree)/tools/perf
>  CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
>  LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
>  
> diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
> index 852d84a98acd..b301c9b315f1 100644
> --- a/tools/bpf/bpftool/bash-completion/bpftool
> +++ b/tools/bpf/bpftool/bash-completion/bpftool
> @@ -1,6 +1,6 @@
>  # bpftool(8) bash completion                               -*- shell-script -*-
>  #
> -# Copyright (C) 2017 Netronome Systems, Inc.
> +# Copyright (C) 2017-2018 Netronome Systems, Inc.
>  #
>  # This software is dual licensed under the GNU General License
>  # Version 2, June 1991 as shown in the file COPYING in the top-level
> @@ -79,6 +79,14 @@ _bpftool_get_map_ids()
>          command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
>  }
>  
> +_bpftool_get_perf_map_ids()
> +{
> +    COMPREPLY+=( $( compgen -W "$( bpftool -jp map  2>&1 | \
> +        command grep -C2 perf_event_array | \
> +        command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
> +}
> +
> +
>  _bpftool_get_prog_ids()
>  {
>      COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
> @@ -359,10 +367,34 @@ _bpftool()
>                      fi
>                      return 0
>                      ;;
> +                event_pipe)
> +                    case $prev in
> +                        $command)
> +                            COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
> +                            return 0
> +                            ;;
> +                        id)
> +                            _bpftool_get_perf_map_ids
> +                            return 0
> +                            ;;
> +                        cpu)
> +                            return 0
> +                            ;;
> +                        index)
> +                            return 0
> +                            ;;
> +                        *)
> +                            _bpftool_once_attr 'cpu'
> +                            _bpftool_once_attr 'index'
> +                            return 0
> +                            ;;
> +                    esac
> +                    ;;
>                  *)
>                      [[ $prev == $object ]] && \
>                          COMPREPLY=( $( compgen -W 'delete dump getnext help \
> -                            lookup pin show list update' -- "$cur" ) )
> +                            lookup pin event_pipe show list update' -- \
> +                            "$cur" ) )
>                      ;;
>              esac
>              ;;
> diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
> index 9c620770c6ed..32f9e397a6c0 100644
> --- a/tools/bpf/bpftool/common.c
> +++ b/tools/bpf/bpftool/common.c
> @@ -331,6 +331,16 @@ char *get_fdinfo(int fd, const char *key)
>  	return NULL;
>  }
>  
> +void print_data_json(uint8_t *data, size_t len)
> +{
> +	unsigned int i;
> +
> +	jsonw_start_array(json_wtr);
> +	for (i = 0; i < len; i++)
> +		jsonw_printf(json_wtr, "%d", data[i]);
> +	jsonw_end_array(json_wtr);
> +}
> +
>  void print_hex_data_json(uint8_t *data, size_t len)
>  {
>  	unsigned int i;
> @@ -421,6 +431,15 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab)
>  	}
>  }
>  
> +unsigned int get_page_size(void)
> +{
> +	static int result;
> +
> +	if (!result)
> +		result = getpagesize();
> +	return result;
> +}
> +
>  unsigned int get_possible_cpus(void)
>  {
>  	static unsigned int result;
> diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
> index cbf8985da362..6173cd997e7a 100644
> --- a/tools/bpf/bpftool/main.h
> +++ b/tools/bpf/bpftool/main.h
> @@ -117,14 +117,18 @@ int do_pin_fd(int fd, const char *name);
>  
>  int do_prog(int argc, char **arg);
>  int do_map(int argc, char **arg);
> +int do_event_pipe(int argc, char **argv);
>  int do_cgroup(int argc, char **arg);
>  
>  int prog_parse_fd(int *argc, char ***argv);
> +int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
>  
>  void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
>  		       const char *arch);
> +void print_data_json(uint8_t *data, size_t len);
>  void print_hex_data_json(uint8_t *data, size_t len);
>  
> +unsigned int get_page_size(void);
>  unsigned int get_possible_cpus(void);
>  const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino);
>  
> diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
> index 5efefde5f578..af6766e956ba 100644
> --- a/tools/bpf/bpftool/map.c
> +++ b/tools/bpf/bpftool/map.c
> @@ -130,8 +130,7 @@ static int map_parse_fd(int *argc, char ***argv)
>  	return -1;
>  }
>  
> -static int
> -map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
> +int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
>  {
>  	int err;
>  	int fd;
> @@ -817,12 +816,13 @@ static int do_help(int argc, char **argv)
>  
>  	fprintf(stderr,
>  		"Usage: %s %s { show | list }   [MAP]\n"
> -		"       %s %s dump    MAP\n"
> -		"       %s %s update  MAP  key DATA value VALUE [UPDATE_FLAGS]\n"
> -		"       %s %s lookup  MAP  key DATA\n"
> -		"       %s %s getnext MAP [key DATA]\n"
> -		"       %s %s delete  MAP  key DATA\n"
> -		"       %s %s pin     MAP  FILE\n"
> +		"       %s %s dump       MAP\n"
> +		"       %s %s update     MAP  key DATA value VALUE [UPDATE_FLAGS]\n"
> +		"       %s %s lookup     MAP  key DATA\n"
> +		"       %s %s getnext    MAP [key DATA]\n"
> +		"       %s %s delete     MAP  key DATA\n"
> +		"       %s %s pin        MAP  FILE\n"
> +		"       %s %s event_pipe MAP [cpu N index M]\n"
>  		"       %s %s help\n"
>  		"\n"
>  		"       MAP := { id MAP_ID | pinned FILE }\n"
> @@ -834,7 +834,7 @@ static int do_help(int argc, char **argv)
>  		"",
>  		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
>  		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
> -		bin_name, argv[-2], bin_name, argv[-2]);
> +		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
>  
>  	return 0;
>  }
> @@ -849,6 +849,7 @@ static const struct cmd cmds[] = {
>  	{ "getnext",	do_getnext },
>  	{ "delete",	do_delete },
>  	{ "pin",	do_pin },
> +	{ "event_pipe",	do_event_pipe },
>  	{ 0 }
>  };
>  
> diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
> new file mode 100644
> index 000000000000..c5a2ced8552d
> --- /dev/null
> +++ b/tools/bpf/bpftool/map_perf_ring.c
> @@ -0,0 +1,347 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright (C) 2018 Netronome Systems, Inc. */
> +/* This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + */
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <libbpf.h>
> +#include <poll.h>
> +#include <signal.h>
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <time.h>
> +#include <unistd.h>
> +#include <linux/bpf.h>
> +#include <linux/perf_event.h>
> +#include <sys/ioctl.h>
> +#include <sys/mman.h>
> +#include <sys/syscall.h>
> +
> +#include <bpf.h>
> +#include <perf-sys.h>
> +
> +#include "main.h"
> +
> +#define MMAP_PAGE_CNT	16
> +
> +static bool stop;
> +
> +struct event_ring_info {
> +	int fd;
> +	int key;
> +	unsigned int cpu;
> +	void *mem;
> +};
> +
> +struct perf_event_sample {
> +	struct perf_event_header header;
> +	__u32 size;
> +	unsigned char data[];
> +};
> +
> +static void int_exit(int signo)
> +{
> +	fprintf(stderr, "Stopping...\n");
> +	stop = true;
> +}
> +
> +static void
> +print_bpf_output(struct event_ring_info *ring, struct perf_event_sample *e)
> +{
> +	struct {
> +		struct perf_event_header header;
> +		__u64 id;
> +		__u64 lost;
> +	} *lost = (void *)e;
> +	struct timespec ts;
> +
> +	if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
> +		perror("Can't read clock for timestamp");
> +		return;
> +	}
> +
> +	if (json_output) {
> +		jsonw_start_object(json_wtr);
> +		jsonw_name(json_wtr, "timestamp");
> +		jsonw_uint(json_wtr, ts.tv_sec * 1000000000ull + ts.tv_nsec);
> +		jsonw_name(json_wtr, "type");
> +		jsonw_uint(json_wtr, e->header.type);
> +		jsonw_name(json_wtr, "cpu");
> +		jsonw_uint(json_wtr, ring->cpu);
> +		jsonw_name(json_wtr, "index");
> +		jsonw_uint(json_wtr, ring->key);
> +		if (e->header.type == PERF_RECORD_SAMPLE) {
> +			jsonw_name(json_wtr, "data");
> +			print_data_json(e->data, e->size);
> +		} else if (e->header.type == PERF_RECORD_LOST) {
> +			jsonw_name(json_wtr, "lost");
> +			jsonw_start_object(json_wtr);
> +			jsonw_name(json_wtr, "id");
> +			jsonw_uint(json_wtr, lost->id);
> +			jsonw_name(json_wtr, "count");
> +			jsonw_uint(json_wtr, lost->lost);
> +			jsonw_end_object(json_wtr);
> +		}
> +		jsonw_end_object(json_wtr);
> +	} else {
> +		if (e->header.type == PERF_RECORD_SAMPLE) {
> +			printf("== @%ld.%ld CPU: %d index: %d =====\n",
> +			       (long)ts.tv_sec, ts.tv_nsec,
> +			       ring->cpu, ring->key);
> +			fprint_hex(stdout, e->data, e->size, " ");
> +			printf("\n");
> +		} else if (e->header.type == PERF_RECORD_LOST) {
> +			printf("lost %lld events\n", lost->lost);
> +		} else {
> +			printf("unknown event type=%d size=%d\n",
> +			       e->header.type, e->header.size);
> +		}
> +	}
> +}
> +
> +static void
> +perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len)
> +{
> +	volatile struct perf_event_mmap_page *header = ring->mem;
> +	__u64 buffer_size = MMAP_PAGE_CNT * get_page_size();
> +	__u64 data_tail = header->data_tail;
> +	__u64 data_head = header->data_head;
> +	void *base, *begin, *end;
> +
> +	asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
> +	if (data_head == data_tail)
> +		return;

this function was copied several times into different places.
I think it's time to put into common lib. Like libbpf.
Would be great if you can do it in the follow up.

for the set:
Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply

* [PATCH] net: disable UDP punt on sockets in RCV_SHUTDWON
From: Chintan Shah @ 2018-05-04 21:08 UTC (permalink / raw)
  To: davem, kuznet, jmorris, yoshfuji, kaber, netdev, linux-kernel
  Cc: chintsha, kamensky, takondra, xe-linux-external, enkechen

A UDP application which opens multiple sockets with same local
address/port combination (using SO_REUSEPORT/SO_REUSEADDR socket options);
and issues connect to a remote socket (using one of these local socket).
Now if the same socket, which issued connect, issues shutdown (SHUT_RD);
packets would still be queued to this socket (if sent from same remote
client, which the local socket connected to), and not delivered to the
other socket in the normal state.

In UDP socket lookup, socket's state (if it has issued SHUTDOWN on
read or not), is not taken into account. When application calls, SHUTDOWN
(SHUT_RD), UDP socket's state is changed (sk_shutdown is set to
RCV_SHUTDOWN).

UDP socket lookup is performed with help of compute_score
function. The function checks socket's attributes against incoming packets
headers; and based on match/mismatch it returns score. We can check for
the socket's state (sk->sk_shutdown) here, in same compute_score function,
and return values accordingly.

Signed-off-by: Chintan Shah <chintsha@cisco.com>
CC: xe-linux-external@cisco.com
---
 net/ipv4/udp.c | 6 ++++++
 net/ipv6/udp.c | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0dfcd73..a5fe6d7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -402,6 +402,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
 #endif
 #endif

+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		return -1;
+
 	if (!net_eq(sock_net(sk), net) ||
 	    udp_sk(sk)->udp_port_hash != hnum ||
 	    ipv6_only_sock(sk))
@@ -483,6 +486,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 #endif
 #endif

+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		return -1;
+
 	if (!net_eq(sock_net(sk), net) ||
 	    ipv6_only_sock(sk))
 		return -1;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d956cbb..2254b07 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -170,6 +170,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
 #endif
 #endif

+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		return -1;
+
 	if (!net_eq(sock_net(sk), net) ||
 	    udp_sk(sk)->udp_port_hash != hnum ||
 	    sk->sk_family != PF_INET6)
@@ -251,6 +254,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 #endif
 #endif

+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		return -1;
+
 	if (!net_eq(sock_net(sk), net) ||
 	    udp_sk(sk)->udp_port_hash != hnum ||
 	    sk->sk_family != PF_INET6)
-- 
2.5.0

^ permalink raw reply related

* [PATCH ghak81 RFC V1 5/5] audit: collect audit task parameters
From: Richard Guy Briggs @ 2018-05-04 20:54 UTC (permalink / raw)
  To: Linux-Audit Mailing List, LKML,
	Linux NetDev Upstream Mailing List, Netfilter Devel List,
	Linux Security Module list, Integrity Measurement Architecture,
	SElinux list
  Cc: Eric Paris, Paul Moore, Steve Grubb, Ingo Molnar, David Howells,
	Richard Guy Briggs
In-Reply-To: <cover.1525466167.git.rgb@redhat.com>

The audit-related parameters in struct task_struct should ideally be
collected together and accessed through a standard audit API.

Collect the existing loginuid, sessionid and audit_context together in a
new struct audit_task_info pointer called "audit" in struct task_struct.

Use kmem_cache to manage this pool of memory.
Un-inline audit_free() to be able to always recover that memory.

See: https://github.com/linux-audit/audit-kernel/issues/81

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
---
 MAINTAINERS                |  2 +-
 include/linux/audit.h      |  8 ++++----
 include/linux/audit_task.h | 31 +++++++++++++++++++++++++++++++
 include/linux/sched.h      |  6 ++----
 init/init_task.c           |  8 ++++++--
 kernel/auditsc.c           |  4 ++--
 6 files changed, 46 insertions(+), 13 deletions(-)
 create mode 100644 include/linux/audit_task.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 0a1410d..8c7992d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2510,7 +2510,7 @@ L:	linux-audit@redhat.com (moderated for non-subscribers)
 W:	https://github.com/linux-audit
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git
 S:	Supported
-F:	include/linux/audit.h
+F:	include/linux/audit*.h
 F:	include/uapi/linux/audit.h
 F:	kernel/audit*
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index dba0d45..1324969 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -237,11 +237,11 @@ extern void __audit_inode_child(struct inode *parent,
 
 static inline void audit_set_context(struct task_struct *task, struct audit_context *ctx)
 {
-	task->audit_context = ctx;
+	task->audit.ctx = ctx;
 }
 static inline struct audit_context *audit_context(struct task_struct *task)
 {
-	return task->audit_context;
+	return task->audit.ctx;
 }
 static inline bool audit_dummy_context(void)
 {
@@ -330,12 +330,12 @@ extern int auditsc_get_stamp(struct audit_context *ctx,
 
 static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
 {
-	return tsk->loginuid;
+	return tsk->audit.loginuid;
 }
 
 static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
 {
-	return tsk->sessionid;
+	return tsk->audit.sessionid;
 }
 
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
diff --git a/include/linux/audit_task.h b/include/linux/audit_task.h
new file mode 100644
index 0000000..d4b3a20
--- /dev/null
+++ b/include/linux/audit_task.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* audit_task.h -- definition of audit_task_info structure
+ *
+ * Copyright 2018 Red Hat Inc., Raleigh, North Carolina.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Written by Richard Guy Briggs <rgb@redhat.com>
+ *
+ */
+
+#ifndef _LINUX_AUDIT_TASK_H_
+#define _LINUX_AUDIT_TASK_H_
+
+struct audit_context;
+struct audit_task_info {
+	kuid_t			loginuid;
+	unsigned int		sessionid;
+	struct audit_context	*ctx;
+};
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b3d697f..b58eca0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -27,9 +27,9 @@
 #include <linux/signal_types.h>
 #include <linux/mm_types_task.h>
 #include <linux/task_io_accounting.h>
+#include <linux/audit_task.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
-struct audit_context;
 struct backing_dev_info;
 struct bio_list;
 struct blk_plug;
@@ -832,10 +832,8 @@ struct task_struct {
 
 	struct callback_head		*task_works;
 
-	struct audit_context		*audit_context;
 #ifdef CONFIG_AUDITSYSCALL
-	kuid_t				loginuid;
-	unsigned int			sessionid;
+	struct audit_task_info		audit;
 #endif
 	struct seccomp			seccomp;
 
diff --git a/init/init_task.c b/init/init_task.c
index c788f91..d33260d 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -9,6 +9,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/audit.h>
 
 #include <asm/pgtable.h>
 #include <linux/uaccess.h>
@@ -118,8 +119,11 @@ struct task_struct init_task
 	.thread_group	= LIST_HEAD_INIT(init_task.thread_group),
 	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
 #ifdef CONFIG_AUDITSYSCALL
-	.loginuid	= INVALID_UID,
-	.sessionid	= AUDIT_SID_UNSET,
+	.audit		= {
+		.loginuid	= INVALID_UID,
+		.sessionid	= AUDIT_SID_UNSET,
+		.ctx		= NULL,
+	},
 #endif
 #ifdef CONFIG_PERF_EVENTS
 	.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f294e4a..b5d8bff 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2068,8 +2068,8 @@ int audit_set_loginuid(kuid_t loginuid)
 			sessionid = (unsigned int)atomic_inc_return(&session_id);
 	}
 
-	task->sessionid = sessionid;
-	task->loginuid = loginuid;
+	task->audit.sessionid = sessionid;
+	task->audit.loginuid = loginuid;
 out:
 	audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc);
 	return rc;
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH ghak81 RFC V1 4/5] audit: use inline function to set audit context
From: Richard Guy Briggs @ 2018-05-04 20:54 UTC (permalink / raw)
  To: Linux-Audit Mailing List, LKML,
	Linux NetDev Upstream Mailing List, Netfilter Devel List,
	Linux Security Module list, Integrity Measurement Architecture,
	SElinux list
  Cc: Richard Guy Briggs, David Howells, Ingo Molnar
In-Reply-To: <cover.1525466167.git.rgb@redhat.com>

Recognizing that the audit context is an internal audit value, use an
access function to set the audit context pointer for the task
rather than reaching directly into the task struct to set it.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
---
 include/linux/audit.h | 8 ++++++++
 kernel/auditsc.c      | 6 +++---
 kernel/fork.c         | 2 +-
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 93e4c61..dba0d45 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -235,6 +235,10 @@ extern void __audit_inode_child(struct inode *parent,
 extern void __audit_seccomp(unsigned long syscall, long signr, int code);
 extern void __audit_ptrace(struct task_struct *t);
 
+static inline void audit_set_context(struct task_struct *task, struct audit_context *ctx)
+{
+	task->audit_context = ctx;
+}
 static inline struct audit_context *audit_context(struct task_struct *task)
 {
 	return task->audit_context;
@@ -472,6 +476,10 @@ static inline bool audit_dummy_context(void)
 {
 	return true;
 }
+static inline void audit_set_context(struct task_struct *task, struct audit_context *ctx)
+{
+	task->audit_context = ctx;
+}
 static inline struct audit_context *audit_context(struct task_struct *task)
 {
 	return NULL;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index a4bbdcc..f294e4a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -865,7 +865,7 @@ static inline struct audit_context *audit_take_context(struct task_struct *tsk,
 		audit_filter_inodes(tsk, context);
 	}
 
-	tsk->audit_context = NULL;
+	audit_set_context(tsk, NULL);
 	return context;
 }
 
@@ -952,7 +952,7 @@ int audit_alloc(struct task_struct *tsk)
 	}
 	context->filterkey = key;
 
-	tsk->audit_context  = context;
+	audit_set_context(tsk, context);
 	set_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT);
 	return 0;
 }
@@ -1590,7 +1590,7 @@ void __audit_syscall_exit(int success, long return_code)
 		kfree(context->filterkey);
 		context->filterkey = NULL;
 	}
-	tsk->audit_context = context;
+	audit_set_context(tsk, context);
 }
 
 static inline void handle_one(const struct inode *inode)
diff --git a/kernel/fork.c b/kernel/fork.c
index 242c8c9..cd18448 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1713,7 +1713,7 @@ static __latent_entropy struct task_struct *copy_process(
 	p->start_time = ktime_get_ns();
 	p->real_start_time = ktime_get_boot_ns();
 	p->io_context = NULL;
-	p->audit_context = NULL;
+	audit_set_context(p, NULL);
 	cgroup_fork(p);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH ghak81 RFC V1 3/5] audit: use inline function to get audit context
From: Richard Guy Briggs @ 2018-05-04 20:54 UTC (permalink / raw)
  To: Linux-Audit Mailing List, LKML,
	Linux NetDev Upstream Mailing List, Netfilter Devel List,
	Linux Security Module list, Integrity Measurement Architecture,
	SElinux list
  Cc: Eric Paris, Paul Moore, Steve Grubb, Ingo Molnar, David Howells,
	Richard Guy Briggs
In-Reply-To: <cover.1525466167.git.rgb@redhat.com>

Recognizing that the audit context is an internal audit value, use an
access function to retrieve the audit context pointer for the task
rather than reaching directly into the task struct to get it.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
---
 include/linux/audit.h                | 16 ++++++++---
 include/net/xfrm.h                   |  2 +-
 kernel/audit.c                       |  4 +--
 kernel/audit_watch.c                 |  2 +-
 kernel/auditsc.c                     | 52 ++++++++++++++++++------------------
 net/bridge/netfilter/ebtables.c      |  2 +-
 net/core/dev.c                       |  2 +-
 net/netfilter/x_tables.c             |  2 +-
 net/netlabel/netlabel_user.c         |  2 +-
 security/integrity/ima/ima_api.c     |  2 +-
 security/integrity/integrity_audit.c |  2 +-
 security/lsm_audit.c                 |  2 +-
 security/selinux/hooks.c             |  4 +--
 security/selinux/selinuxfs.c         |  6 ++---
 security/selinux/ss/services.c       | 12 ++++-----
 15 files changed, 60 insertions(+), 52 deletions(-)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 5f86f7c..93e4c61 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -235,26 +235,30 @@ extern void __audit_inode_child(struct inode *parent,
 extern void __audit_seccomp(unsigned long syscall, long signr, int code);
 extern void __audit_ptrace(struct task_struct *t);
 
+static inline struct audit_context *audit_context(struct task_struct *task)
+{
+	return task->audit_context;
+}
 static inline bool audit_dummy_context(void)
 {
-	void *p = current->audit_context;
+	void *p = audit_context(current);
 	return !p || *(int *)p;
 }
 static inline void audit_free(struct task_struct *task)
 {
-	if (unlikely(task->audit_context))
+	if (unlikely(audit_context(task)))
 		__audit_free(task);
 }
 static inline void audit_syscall_entry(int major, unsigned long a0,
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
 {
-	if (unlikely(current->audit_context))
+	if (unlikely(audit_context(current)))
 		__audit_syscall_entry(major, a0, a1, a2, a3);
 }
 static inline void audit_syscall_exit(void *pt_regs)
 {
-	if (unlikely(current->audit_context)) {
+	if (unlikely(audit_context(current))) {
 		int success = is_syscall_success(pt_regs);
 		long return_code = regs_return_value(pt_regs);
 
@@ -468,6 +472,10 @@ static inline bool audit_dummy_context(void)
 {
 	return true;
 }
+static inline struct audit_context *audit_context(struct task_struct *task)
+{
+	return NULL;
+}
 static inline struct filename *audit_reusename(const __user char *name)
 {
 	return NULL;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index fcce8ee..2788332 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -736,7 +736,7 @@ static inline struct audit_buffer *xfrm_audit_start(const char *op)
 
 	if (audit_enabled == 0)
 		return NULL;
-	audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC,
+	audit_buf = audit_log_start(audit_context(current), GFP_ATOMIC,
 				    AUDIT_MAC_IPSEC_EVENT);
 	if (audit_buf == NULL)
 		return NULL;
diff --git a/kernel/audit.c b/kernel/audit.c
index e9f9a90..9a03603 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1099,7 +1099,7 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
 
 	if (audit_enabled == AUDIT_OFF)
 		return;
-	ab = audit_log_start(current->audit_context,
+	ab = audit_log_start(audit_context(current),
 			     GFP_KERNEL, AUDIT_FEATURE_CHANGE);
 	if (!ab)
 		return;
@@ -2317,7 +2317,7 @@ void audit_log_link_denied(const char *operation)
 		return;
 
 	/* Generate AUDIT_ANOM_LINK with subject, operation, outcome. */
-	ab = audit_log_start(current->audit_context, GFP_KERNEL,
+	ab = audit_log_start(audit_context(current), GFP_KERNEL,
 			     AUDIT_ANOM_LINK);
 	if (!ab)
 		return;
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 9eb8b35..8b596c4 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -274,7 +274,7 @@ static void audit_update_watch(struct audit_parent *parent,
 		/* If the update involves invalidating rules, do the inode-based
 		 * filtering now, so we don't omit records. */
 		if (invalidating && !audit_dummy_context())
-			audit_filter_inodes(current, current->audit_context);
+			audit_filter_inodes(current, audit_context(current));
 
 		/* updating ino will likely change which audit_hash_list we
 		 * are on so we need a new watch for the new list */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 6e3ceb9..a4bbdcc 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -836,7 +836,7 @@ static inline struct audit_context *audit_take_context(struct task_struct *tsk,
 						      int return_valid,
 						      long return_code)
 {
-	struct audit_context *context = tsk->audit_context;
+	struct audit_context *context = audit_context(tsk);
 
 	if (!context)
 		return NULL;
@@ -1510,7 +1510,7 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2,
 			   unsigned long a3, unsigned long a4)
 {
 	struct task_struct *tsk = current;
-	struct audit_context *context = tsk->audit_context;
+	struct audit_context *context = audit_context(tsk);
 	enum audit_state     state;
 
 	if (!audit_enabled || !context)
@@ -1602,7 +1602,7 @@ static inline void handle_one(const struct inode *inode)
 	int count;
 	if (likely(!inode->i_fsnotify_marks))
 		return;
-	context = current->audit_context;
+	context = audit_context(current);
 	p = context->trees;
 	count = context->tree_count;
 	rcu_read_lock();
@@ -1633,7 +1633,7 @@ static void handle_path(const struct dentry *dentry)
 	unsigned long seq;
 	int count;
 
-	context = current->audit_context;
+	context = audit_context(current);
 	p = context->trees;
 	count = context->tree_count;
 retry:
@@ -1715,7 +1715,7 @@ static struct audit_names *audit_alloc_name(struct audit_context *context,
 struct filename *
 __audit_reusename(const __user char *uptr)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 	struct audit_names *n;
 
 	list_for_each_entry(n, &context->names_list, list) {
@@ -1738,7 +1738,7 @@ struct filename *
  */
 void __audit_getname(struct filename *name)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 	struct audit_names *n;
 
 	if (!context->in_syscall)
@@ -1766,7 +1766,7 @@ void __audit_getname(struct filename *name)
 void __audit_inode(struct filename *name, const struct dentry *dentry,
 		   unsigned int flags)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 	struct inode *inode = d_backing_inode(dentry);
 	struct audit_names *n;
 	bool parent = flags & AUDIT_INODE_PARENT;
@@ -1865,7 +1865,7 @@ void __audit_inode_child(struct inode *parent,
 			 const struct dentry *dentry,
 			 const unsigned char type)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 	struct inode *inode = d_backing_inode(dentry);
 	const char *dname = dentry->d_name.name;
 	struct audit_names *n, *found_parent = NULL, *found_child = NULL;
@@ -2084,7 +2084,7 @@ int audit_set_loginuid(kuid_t loginuid)
  */
 void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 
 	if (attr)
 		memcpy(&context->mq_open.attr, attr, sizeof(struct mq_attr));
@@ -2108,7 +2108,7 @@ void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr)
 void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio,
 			const struct timespec64 *abs_timeout)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 	struct timespec64 *p = &context->mq_sendrecv.abs_timeout;
 
 	if (abs_timeout)
@@ -2132,7 +2132,7 @@ void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio,
 
 void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 
 	if (notification)
 		context->mq_notify.sigev_signo = notification->sigev_signo;
@@ -2151,7 +2151,7 @@ void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification)
  */
 void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 	context->mq_getsetattr.mqdes = mqdes;
 	context->mq_getsetattr.mqstat = *mqstat;
 	context->type = AUDIT_MQ_GETSETATTR;
@@ -2164,7 +2164,7 @@ void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
  */
 void __audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 	context->ipc.uid = ipcp->uid;
 	context->ipc.gid = ipcp->gid;
 	context->ipc.mode = ipcp->mode;
@@ -2184,7 +2184,7 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp)
  */
 void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 
 	context->ipc.qbytes = qbytes;
 	context->ipc.perm_uid = uid;
@@ -2195,7 +2195,7 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mo
 
 void __audit_bprm(struct linux_binprm *bprm)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 
 	context->type = AUDIT_EXECVE;
 	context->execve.argc = bprm->argc;
@@ -2210,7 +2210,7 @@ void __audit_bprm(struct linux_binprm *bprm)
  */
 int __audit_socketcall(int nargs, unsigned long *args)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 
 	if (nargs <= 0 || nargs > AUDITSC_ARGS || !args)
 		return -EINVAL;
@@ -2228,7 +2228,7 @@ int __audit_socketcall(int nargs, unsigned long *args)
  */
 void __audit_fd_pair(int fd1, int fd2)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 	context->fds[0] = fd1;
 	context->fds[1] = fd2;
 }
@@ -2242,7 +2242,7 @@ void __audit_fd_pair(int fd1, int fd2)
  */
 int __audit_sockaddr(int len, void *a)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 
 	if (!context->sockaddr) {
 		void *p = kmalloc(sizeof(struct sockaddr_storage), GFP_KERNEL);
@@ -2258,7 +2258,7 @@ int __audit_sockaddr(int len, void *a)
 
 void __audit_ptrace(struct task_struct *t)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 
 	context->target_pid = task_tgid_nr(t);
 	context->target_auid = audit_get_loginuid(t);
@@ -2280,7 +2280,7 @@ int audit_signal_info(int sig, struct task_struct *t)
 {
 	struct audit_aux_data_pids *axp;
 	struct task_struct *tsk = current;
-	struct audit_context *ctx = tsk->audit_context;
+	struct audit_context *ctx = audit_context(tsk);
 	kuid_t uid = current_uid(), t_uid = task_uid(t);
 
 	if (auditd_test_task(t) &&
@@ -2347,7 +2347,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 			   const struct cred *new, const struct cred *old)
 {
 	struct audit_aux_data_bprm_fcaps *ax;
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 	struct cpu_vfs_cap_data vcaps;
 
 	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
@@ -2387,7 +2387,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
  */
 void __audit_log_capset(const struct cred *new, const struct cred *old)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 	context->capset.pid = task_tgid_nr(current);
 	context->capset.cap.effective   = new->cap_effective;
 	context->capset.cap.inheritable = new->cap_effective;
@@ -2398,7 +2398,7 @@ void __audit_log_capset(const struct cred *new, const struct cred *old)
 
 void __audit_mmap_fd(int fd, int flags)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 	context->mmap.fd = fd;
 	context->mmap.flags = flags;
 	context->type = AUDIT_MMAP;
@@ -2406,7 +2406,7 @@ void __audit_mmap_fd(int fd, int flags)
 
 void __audit_log_kern_module(char *name)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context(current);
 
 	context->module.name = kmalloc(strlen(name) + 1, GFP_KERNEL);
 	strcpy(context->module.name, name);
@@ -2415,7 +2415,7 @@ void __audit_log_kern_module(char *name)
 
 void __audit_fanotify(unsigned int response)
 {
-	audit_log(current->audit_context, GFP_KERNEL,
+	audit_log(audit_context(current), GFP_KERNEL,
 		AUDIT_FANOTIFY,	"resp=%u", response);
 }
 
@@ -2482,7 +2482,7 @@ void __audit_seccomp(unsigned long syscall, long signr, int code)
 
 struct list_head *audit_killed_trees(void)
 {
-	struct audit_context *ctx = current->audit_context;
+	struct audit_context *ctx = audit_context(current);
 	if (likely(!ctx || !ctx->in_syscall))
 		return NULL;
 	return &ctx->killed_trees;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 032e0fe..ff8450b 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1062,7 +1062,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
 
 #ifdef CONFIG_AUDIT
 	if (audit_enabled) {
-		audit_log(current->audit_context, GFP_KERNEL,
+		audit_log(audit_context(current), GFP_KERNEL,
 			  AUDIT_NETFILTER_CFG,
 			  "table=%s family=%u entries=%u",
 			  repl->name, AF_BRIDGE, repl->nentries);
diff --git a/net/core/dev.c b/net/core/dev.c
index 969462e..2837086 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6749,7 +6749,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
 			dev->flags & IFF_PROMISC ? "entered" : "left");
 		if (audit_enabled) {
 			current_uid_gid(&uid, &gid);
-			audit_log(current->audit_context, GFP_ATOMIC,
+			audit_log(audit_context(current), GFP_ATOMIC,
 				AUDIT_ANOM_PROMISCUOUS,
 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
 				dev->name, (dev->flags & IFF_PROMISC),
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 71325fe..f271630 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1414,7 +1414,7 @@ struct xt_table_info *
 
 #ifdef CONFIG_AUDIT
 	if (audit_enabled) {
-		audit_log(current->audit_context, GFP_KERNEL,
+		audit_log(audit_context(current), GFP_KERNEL,
 			  AUDIT_NETFILTER_CFG,
 			  "table=%s family=%u entries=%u",
 			  table->name, table->af, private->number);
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 58495f4..6cd5573 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -104,7 +104,7 @@ struct audit_buffer *netlbl_audit_start_common(int type,
 	if (audit_enabled == 0)
 		return NULL;
 
-	audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type);
+	audit_buf = audit_log_start(audit_context(current), GFP_ATOMIC, type);
 	if (audit_buf == NULL)
 		return NULL;
 
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index bf88236..a727ae0 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -326,7 +326,7 @@ void ima_audit_measurement(struct integrity_iint_cache *iint,
 		hex_byte_pack(hash + (i * 2), iint->ima_hash->digest[i]);
 	hash[i * 2] = '\0';
 
-	ab = audit_log_start(current->audit_context, GFP_KERNEL,
+	ab = audit_log_start(audit_context(current), GFP_KERNEL,
 			     AUDIT_INTEGRITY_RULE);
 	if (!ab)
 		goto out;
diff --git a/security/integrity/integrity_audit.c b/security/integrity/integrity_audit.c
index 90987d1..79adc98a 100644
--- a/security/integrity/integrity_audit.c
+++ b/security/integrity/integrity_audit.c
@@ -38,7 +38,7 @@ void integrity_audit_msg(int audit_msgno, struct inode *inode,
 	if (!integrity_audit_info && audit_info == 1)	/* Skip info messages */
 		return;
 
-	ab = audit_log_start(current->audit_context, GFP_KERNEL, audit_msgno);
+	ab = audit_log_start(audit_context(current), GFP_KERNEL, audit_msgno);
 	audit_log_format(ab, "pid=%d uid=%u auid=%u ses=%u",
 			 task_pid_nr(current),
 			 from_kuid(&init_user_ns, current_cred()->uid),
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 67703db..ccae258 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -447,7 +447,7 @@ void common_lsm_audit(struct common_audit_data *a,
 	if (a == NULL)
 		return;
 	/* we use GFP_ATOMIC so we won't sleep */
-	ab = audit_log_start(current->audit_context, GFP_ATOMIC | __GFP_NOWARN,
+	ab = audit_log_start(audit_context(current), GFP_ATOMIC | __GFP_NOWARN,
 			     AUDIT_AVC);
 
 	if (ab == NULL)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4cafe6a..f1de97a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3294,7 +3294,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
 			} else {
 				audit_size = 0;
 			}
-			ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR);
+			ab = audit_log_start(audit_context(current), GFP_ATOMIC, AUDIT_SELINUX_ERR);
 			audit_log_format(ab, "op=setxattr invalid_context=");
 			audit_log_n_untrustedstring(ab, value, audit_size);
 			audit_log_end(ab);
@@ -6431,7 +6431,7 @@ static int selinux_setprocattr(const char *name, void *value, size_t size)
 					audit_size = size - 1;
 				else
 					audit_size = size;
-				ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR);
+				ab = audit_log_start(audit_context(current), GFP_ATOMIC, AUDIT_SELINUX_ERR);
 				audit_log_format(ab, "op=fscreate invalid_context=");
 				audit_log_n_untrustedstring(ab, value, audit_size);
 				audit_log_end(ab);
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index efdc633..e5f90da 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -167,7 +167,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf,
 				      NULL);
 		if (length)
 			goto out;
-		audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_STATUS,
+		audit_log(audit_context(current), GFP_KERNEL, AUDIT_MAC_STATUS,
 			"enforcing=%d old_enforcing=%d auid=%u ses=%u"
 			" enabled=%d old-enabled=%d lsm=selinux res=1",
 			new_value, old_value,
@@ -303,7 +303,7 @@ static ssize_t sel_write_disable(struct file *file, const char __user *buf,
 		length = selinux_disable(fsi->state);
 		if (length)
 			goto out;
-		audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_STATUS,
+		audit_log(audit_context(current), GFP_KERNEL, AUDIT_MAC_STATUS,
 			"enforcing=%d old_enforcing=%d auid=%u ses=%u"
 			" enabled=%d old-enabled=%d lsm=selinux res=1",
 			enforcing, enforcing,
@@ -581,7 +581,7 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf,
 	length = count;
 
 out1:
-	audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_POLICY_LOAD,
+	audit_log(audit_context(current), GFP_KERNEL, AUDIT_MAC_POLICY_LOAD,
 		"auid=%u ses=%u lsm=selinux res=1",
 		from_kuid(&init_user_ns, audit_get_loginuid(current)),
 		audit_get_sessionid(current));
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 8057e19..83f40e2 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -501,7 +501,7 @@ static void security_dump_masked_av(struct policydb *policydb,
 		goto out;
 
 	/* audit a message */
-	ab = audit_log_start(current->audit_context,
+	ab = audit_log_start(audit_context(current),
 			     GFP_ATOMIC, AUDIT_SELINUX_ERR);
 	if (!ab)
 		goto out;
@@ -743,7 +743,7 @@ static int security_validtrans_handle_fail(struct selinux_state *state,
 		goto out;
 	if (context_struct_to_string(p, tcontext, &t, &tlen))
 		goto out;
-	audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR,
+	audit_log(audit_context(current), GFP_ATOMIC, AUDIT_SELINUX_ERR,
 		  "op=security_validate_transition seresult=denied"
 		  " oldcontext=%s newcontext=%s taskcontext=%s tclass=%s",
 		  o, n, t, sym_name(p, SYM_CLASSES, tclass-1));
@@ -929,7 +929,7 @@ int security_bounded_transition(struct selinux_state *state,
 					      &old_name, &length) &&
 		    !context_struct_to_string(policydb, new_context,
 					      &new_name, &length)) {
-			audit_log(current->audit_context,
+			audit_log(audit_context(current),
 				  GFP_ATOMIC, AUDIT_SELINUX_ERR,
 				  "op=security_bounded_transition "
 				  "seresult=denied "
@@ -1586,7 +1586,7 @@ static int compute_sid_handle_invalid_context(
 		goto out;
 	if (context_struct_to_string(policydb, newcontext, &n, &nlen))
 		goto out;
-	audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR,
+	audit_log(audit_context(current), GFP_ATOMIC, AUDIT_SELINUX_ERR,
 		  "op=security_compute_sid invalid_context=%s"
 		  " scontext=%s"
 		  " tcontext=%s"
@@ -2882,7 +2882,7 @@ int security_set_bools(struct selinux_state *state, int len, int *values)
 
 	for (i = 0; i < len; i++) {
 		if (!!values[i] != policydb->bool_val_to_struct[i]->state) {
-			audit_log(current->audit_context, GFP_ATOMIC,
+			audit_log(audit_context(current), GFP_ATOMIC,
 				AUDIT_MAC_CONFIG_CHANGE,
 				"bool=%s val=%d old_val=%d auid=%u ses=%u",
 				sym_name(policydb, SYM_BOOLS, i),
@@ -3025,7 +3025,7 @@ int security_sid_mls_copy(struct selinux_state *state,
 		if (rc) {
 			if (!context_struct_to_string(policydb, &newcon, &s,
 						      &len)) {
-				audit_log(current->audit_context,
+				audit_log(audit_context(current),
 					  GFP_ATOMIC, AUDIT_SELINUX_ERR,
 					  "op=security_sid_mls_copy "
 					  "invalid_context=%s", s);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH ghak81 RFC V1 2/5] audit: convert sessionid unset to a macro
From: Richard Guy Briggs @ 2018-05-04 20:54 UTC (permalink / raw)
  To: Linux-Audit Mailing List, LKML,
	Linux NetDev Upstream Mailing List, Netfilter Devel List,
	Linux Security Module list, Integrity Measurement Architecture,
	SElinux list
  Cc: Richard Guy Briggs, David Howells, Ingo Molnar
In-Reply-To: <cover.1525466167.git.rgb@redhat.com>

Use a macro, "AUDIT_SID_UNSET", to replace each instance of
initialization and comparison to an audit session ID.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
---
 include/linux/audit.h      | 2 +-
 include/net/xfrm.h         | 2 +-
 include/uapi/linux/audit.h | 1 +
 init/init_task.c           | 2 +-
 kernel/auditsc.c           | 4 ++--
 5 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 75d5b03..5f86f7c 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -513,7 +513,7 @@ static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
 }
 static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
 {
-	return -1;
+	return AUDIT_SID_UNSET;
 }
 static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 { }
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a872379..fcce8ee 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -751,7 +751,7 @@ static inline void xfrm_audit_helper_usrinfo(bool task_valid,
 					    audit_get_loginuid(current) :
 					    INVALID_UID);
 	const unsigned int ses = task_valid ? audit_get_sessionid(current) :
-		(unsigned int) -1;
+		AUDIT_SID_UNSET;
 
 	audit_log_format(audit_buf, " auid=%u ses=%u", auid, ses);
 	audit_log_task_context(audit_buf);
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4e61a9e..04f9bd2 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -465,6 +465,7 @@ struct audit_tty_status {
 };
 
 #define AUDIT_UID_UNSET (unsigned int)-1
+#define AUDIT_SID_UNSET ((unsigned int)-1)
 
 /* audit_rule_data supports filter rules with both integer and string
  * fields.  It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/init/init_task.c b/init/init_task.c
index 3ac6e75..c788f91 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -119,7 +119,7 @@ struct task_struct init_task
 	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
 #ifdef CONFIG_AUDITSYSCALL
 	.loginuid	= INVALID_UID,
-	.sessionid	= (unsigned int)-1,
+	.sessionid	= AUDIT_SID_UNSET,
 #endif
 #ifdef CONFIG_PERF_EVENTS
 	.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f3817d0..6e3ceb9 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2050,7 +2050,7 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid,
 int audit_set_loginuid(kuid_t loginuid)
 {
 	struct task_struct *task = current;
-	unsigned int oldsessionid, sessionid = (unsigned int)-1;
+	unsigned int oldsessionid, sessionid = AUDIT_SID_UNSET;
 	kuid_t oldloginuid;
 	int rc;
 
@@ -2064,7 +2064,7 @@ int audit_set_loginuid(kuid_t loginuid)
 	/* are we setting or clearing? */
 	if (uid_valid(loginuid)) {
 		sessionid = (unsigned int)atomic_inc_return(&session_id);
-		if (unlikely(sessionid == (unsigned int)-1))
+		if (unlikely(sessionid == AUDIT_SID_UNSET))
 			sessionid = (unsigned int)atomic_inc_return(&session_id);
 	}
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH ghak81 RFC V1 1/5] audit: normalize loginuid read access
From: Richard Guy Briggs @ 2018-05-04 20:54 UTC (permalink / raw)
  To: Linux-Audit Mailing List, LKML,
	Linux NetDev Upstream Mailing List, Netfilter Devel List,
	Linux Security Module list, Integrity Measurement Architecture,
	SElinux list
  Cc: Richard Guy Briggs, David Howells, Ingo Molnar
In-Reply-To: <cover.1525466167.git.rgb@redhat.com>

Recognizing that the loginuid is an internal audit value, use an access
function to retrieve the audit loginuid value for the task rather than
reaching directly into the task struct to get it.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
---
 kernel/auditsc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 479c031..f3817d0 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -374,7 +374,7 @@ static int audit_field_compare(struct task_struct *tsk,
 	case AUDIT_COMPARE_EGID_TO_OBJ_GID:
 		return audit_compare_gid(cred->egid, name, f, ctx);
 	case AUDIT_COMPARE_AUID_TO_OBJ_UID:
-		return audit_compare_uid(tsk->loginuid, name, f, ctx);
+		return audit_compare_uid(audit_get_loginuid(tsk), name, f, ctx);
 	case AUDIT_COMPARE_SUID_TO_OBJ_UID:
 		return audit_compare_uid(cred->suid, name, f, ctx);
 	case AUDIT_COMPARE_SGID_TO_OBJ_GID:
@@ -385,7 +385,7 @@ static int audit_field_compare(struct task_struct *tsk,
 		return audit_compare_gid(cred->fsgid, name, f, ctx);
 	/* uid comparisons */
 	case AUDIT_COMPARE_UID_TO_AUID:
-		return audit_uid_comparator(cred->uid, f->op, tsk->loginuid);
+		return audit_uid_comparator(cred->uid, f->op, audit_get_loginuid(tsk));
 	case AUDIT_COMPARE_UID_TO_EUID:
 		return audit_uid_comparator(cred->uid, f->op, cred->euid);
 	case AUDIT_COMPARE_UID_TO_SUID:
@@ -394,11 +394,11 @@ static int audit_field_compare(struct task_struct *tsk,
 		return audit_uid_comparator(cred->uid, f->op, cred->fsuid);
 	/* auid comparisons */
 	case AUDIT_COMPARE_AUID_TO_EUID:
-		return audit_uid_comparator(tsk->loginuid, f->op, cred->euid);
+		return audit_uid_comparator(audit_get_loginuid(tsk), f->op, cred->euid);
 	case AUDIT_COMPARE_AUID_TO_SUID:
-		return audit_uid_comparator(tsk->loginuid, f->op, cred->suid);
+		return audit_uid_comparator(audit_get_loginuid(tsk), f->op, cred->suid);
 	case AUDIT_COMPARE_AUID_TO_FSUID:
-		return audit_uid_comparator(tsk->loginuid, f->op, cred->fsuid);
+		return audit_uid_comparator(audit_get_loginuid(tsk), f->op, cred->fsuid);
 	/* euid comparisons */
 	case AUDIT_COMPARE_EUID_TO_SUID:
 		return audit_uid_comparator(cred->euid, f->op, cred->suid);
@@ -611,7 +611,7 @@ static int audit_filter_rules(struct task_struct *tsk,
 				result = match_tree_refs(ctx, rule->tree);
 			break;
 		case AUDIT_LOGINUID:
-			result = audit_uid_comparator(tsk->loginuid, f->op, f->uid);
+			result = audit_uid_comparator(audit_get_loginuid(tsk), f->op, f->uid);
 			break;
 		case AUDIT_LOGINUID_SET:
 			result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val);
@@ -2287,8 +2287,8 @@ int audit_signal_info(int sig, struct task_struct *t)
 	    (sig == SIGTERM || sig == SIGHUP ||
 	     sig == SIGUSR1 || sig == SIGUSR2)) {
 		audit_sig_pid = task_tgid_nr(tsk);
-		if (uid_valid(tsk->loginuid))
-			audit_sig_uid = tsk->loginuid;
+		if (uid_valid(audit_get_loginuid(tsk)))
+			audit_sig_uid = audit_get_loginuid(tsk);
 		else
 			audit_sig_uid = uid;
 		security_task_getsecid(tsk, &audit_sig_sid);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH ghak81 RFC V1 0/5] audit: group task params
From: Richard Guy Briggs @ 2018-05-04 20:54 UTC (permalink / raw)
  To: Linux-Audit Mailing List, LKML,
	Linux NetDev Upstream Mailing List, Netfilter Devel List,
	Linux Security Module list, Integrity Measurement Architecture,
	SElinux list
  Cc: Eric Paris, Paul Moore, Steve Grubb, Ingo Molnar, David Howells,
	Richard Guy Briggs

Group the audit parameters for each task into one structure.
In particular, remove the loginuid and sessionid values and the audit
context pointer from the task structure, replacing them with an audit
task information structure to contain them.  Use access functions to
access audit values.

Note:  Use static allocation of the audit task information structure
initially.  Dynamic allocation was considered and attempted, but isn't
ready yet.  Static allocation has the limitation that future audit task
information structure changes would cause a visible change to the rest
of the kernel, whereas dynamic allocation would mostly hide any future
changes.

The first four access normalization patches could stand alone.

Passes audit-testsuite.

Richard Guy Briggs (5):
  audit: normalize loginuid read access
  audit: convert sessionid unset to a macro
  audit: use inline function to get audit context
  audit: use inline function to set audit context
  audit: collect audit task parameters

 MAINTAINERS                          |  2 +-
 include/linux/audit.h                | 30 ++++++++++---
 include/linux/audit_task.h           | 31 ++++++++++++++
 include/linux/sched.h                |  6 +--
 include/net/xfrm.h                   |  4 +-
 include/uapi/linux/audit.h           |  1 +
 init/init_task.c                     |  8 +++-
 kernel/audit.c                       |  4 +-
 kernel/audit_watch.c                 |  2 +-
 kernel/auditsc.c                     | 82 ++++++++++++++++++------------------
 kernel/fork.c                        |  2 +-
 net/bridge/netfilter/ebtables.c      |  2 +-
 net/core/dev.c                       |  2 +-
 net/netfilter/x_tables.c             |  2 +-
 net/netlabel/netlabel_user.c         |  2 +-
 security/integrity/ima/ima_api.c     |  2 +-
 security/integrity/integrity_audit.c |  2 +-
 security/lsm_audit.c                 |  2 +-
 security/selinux/hooks.c             |  4 +-
 security/selinux/selinuxfs.c         |  6 +--
 security/selinux/ss/services.c       | 12 +++---
 21 files changed, 129 insertions(+), 79 deletions(-)
 create mode 100644 include/linux/audit_task.h

-- 
1.8.3.1

^ permalink raw reply

* [PATCH net-next] net/ipv6: rename rt6_next to fib6_next
From: David Ahern @ 2018-05-04 20:54 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern

This slipped through the cracks in the followup set to the fib6_info flip.
Rename rt6_next to fib6_next.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h |  6 +++---
 net/ipv6/ip6_fib.c    | 26 +++++++++++++-------------
 net/ipv6/route.c      | 12 ++++++------
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1af450d4e923..a3ec08d05756 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -135,7 +135,7 @@ struct fib6_nh {
 
 struct fib6_info {
 	struct fib6_table		*fib6_table;
-	struct fib6_info __rcu		*rt6_next;
+	struct fib6_info __rcu		*fib6_next;
 	struct fib6_node __rcu		*fib6_node;
 
 	/* Multipath routes:
@@ -192,11 +192,11 @@ struct rt6_info {
 
 #define for_each_fib6_node_rt_rcu(fn)					\
 	for (rt = rcu_dereference((fn)->leaf); rt;			\
-	     rt = rcu_dereference(rt->rt6_next))
+	     rt = rcu_dereference(rt->fib6_next))
 
 #define for_each_fib6_walker_rt(w)					\
 	for (rt = (w)->leaf; rt;					\
-	     rt = rcu_dereference_protected(rt->rt6_next, 1))
+	     rt = rcu_dereference_protected(rt->fib6_next, 1))
 
 static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
 {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6421c893466e..f0a4262a4789 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -945,7 +945,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 	ins = &fn->leaf;
 
 	for (iter = leaf; iter;
-	     iter = rcu_dereference_protected(iter->rt6_next,
+	     iter = rcu_dereference_protected(iter->fib6_next,
 				lockdep_is_held(&rt->fib6_table->tb6_lock))) {
 		/*
 		 *	Search for duplicates
@@ -1002,7 +1002,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 			break;
 
 next_iter:
-		ins = &iter->rt6_next;
+		ins = &iter->fib6_next;
 	}
 
 	if (fallback_ins && !found) {
@@ -1031,7 +1031,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 					      &sibling->fib6_siblings);
 				break;
 			}
-			sibling = rcu_dereference_protected(sibling->rt6_next,
+			sibling = rcu_dereference_protected(sibling->fib6_next,
 				    lockdep_is_held(&rt->fib6_table->tb6_lock));
 		}
 		/* For each sibling in the list, increment the counter of
@@ -1065,7 +1065,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 		if (err)
 			return err;
 
-		rcu_assign_pointer(rt->rt6_next, iter);
+		rcu_assign_pointer(rt->fib6_next, iter);
 		atomic_inc(&rt->fib6_ref);
 		rcu_assign_pointer(rt->fib6_node, fn);
 		rcu_assign_pointer(*ins, rt);
@@ -1096,7 +1096,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 
 		atomic_inc(&rt->fib6_ref);
 		rcu_assign_pointer(rt->fib6_node, fn);
-		rt->rt6_next = iter->rt6_next;
+		rt->fib6_next = iter->fib6_next;
 		rcu_assign_pointer(*ins, rt);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
@@ -1113,14 +1113,14 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 
 		if (nsiblings) {
 			/* Replacing an ECMP route, remove all siblings */
-			ins = &rt->rt6_next;
+			ins = &rt->fib6_next;
 			iter = rcu_dereference_protected(*ins,
 				    lockdep_is_held(&rt->fib6_table->tb6_lock));
 			while (iter) {
 				if (iter->fib6_metric > rt->fib6_metric)
 					break;
 				if (rt6_qualify_for_ecmp(iter)) {
-					*ins = iter->rt6_next;
+					*ins = iter->fib6_next;
 					iter->fib6_node = NULL;
 					fib6_purge_rt(iter, fn, info->nl_net);
 					if (rcu_access_pointer(fn->rr_ptr) == iter)
@@ -1129,7 +1129,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 					nsiblings--;
 					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
 				} else {
-					ins = &iter->rt6_next;
+					ins = &iter->fib6_next;
 				}
 				iter = rcu_dereference_protected(*ins,
 					lockdep_is_held(&rt->fib6_table->tb6_lock));
@@ -1712,7 +1712,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 	RT6_TRACE("fib6_del_route\n");
 
 	/* Unlink it */
-	*rtp = rt->rt6_next;
+	*rtp = rt->fib6_next;
 	rt->fib6_node = NULL;
 	net->ipv6.rt6_stats->fib_rt_entries--;
 	net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1741,7 +1741,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 	FOR_WALKERS(net, w) {
 		if (w->state == FWS_C && w->leaf == rt) {
 			RT6_TRACE("walker %p adjusted by delroute\n", w);
-			w->leaf = rcu_dereference_protected(rt->rt6_next,
+			w->leaf = rcu_dereference_protected(rt->fib6_next,
 					    lockdep_is_held(&table->tb6_lock));
 			if (!w->leaf)
 				w->state = FWS_U;
@@ -1795,7 +1795,7 @@ int fib6_del(struct fib6_info *rt, struct nl_info *info)
 			fib6_del_route(table, fn, rtp, info);
 			return 0;
 		}
-		rtp_next = &cur->rt6_next;
+		rtp_next = &cur->fib6_next;
 	}
 	return -ENOENT;
 }
@@ -2279,7 +2279,7 @@ static int ipv6_route_yield(struct fib6_walker *w)
 
 	do {
 		iter->w.leaf = rcu_dereference_protected(
-				iter->w.leaf->rt6_next,
+				iter->w.leaf->fib6_next,
 				lockdep_is_held(&iter->tbl->tb6_lock));
 		iter->skip--;
 		if (!iter->skip && iter->w.leaf)
@@ -2345,7 +2345,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (!v)
 		goto iter_table;
 
-	n = rcu_dereference_bh(((struct fib6_info *)v)->rt6_next);
+	n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next);
 	if (n) {
 		++*pos;
 		return n;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8ed1b519c2b0..daa3662da0ee 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -468,7 +468,7 @@ static inline struct fib6_info *rt6_device_match(struct net *net,
 	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
 		return rt;
 
-	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
+	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
 		const struct net_device *dev = sprt->fib6_nh.nh_dev;
 
 		if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
@@ -696,7 +696,7 @@ static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
 
 	match = NULL;
 	cont = NULL;
-	for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
+	for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
 		if (rt->fib6_metric != metric) {
 			cont = rt;
 			break;
@@ -706,7 +706,7 @@ static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
 	}
 
 	for (rt = leaf; rt && rt != rr_head;
-	     rt = rcu_dereference(rt->rt6_next)) {
+	     rt = rcu_dereference(rt->fib6_next)) {
 		if (rt->fib6_metric != metric) {
 			cont = rt;
 			break;
@@ -718,7 +718,7 @@ static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
 	if (match || !cont)
 		return match;
 
-	for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
+	for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 
 	return match;
@@ -756,7 +756,7 @@ static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
 			     &do_rr);
 
 	if (do_rr) {
-		struct fib6_info *next = rcu_dereference(rt0->rt6_next);
+		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
 
 		/* no entries matched; do round-robin */
 		if (!next || next->fib6_metric != rt0->fib6_metric)
@@ -3781,7 +3781,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
 		if (iter->fib6_metric == rt->fib6_metric &&
 		    rt6_qualify_for_ecmp(iter))
 			return iter;
-		iter = rcu_dereference_protected(iter->rt6_next,
+		iter = rcu_dereference_protected(iter->fib6_next,
 				lockdep_is_held(&rt->fib6_table->tb6_lock));
 	}
 
-- 
2.11.0

^ permalink raw reply related

* Re: [net-next PATCH v2 4/8] udp: Do not pass checksum as a parameter to GSO segmentation
From: Eric Dumazet @ 2018-05-04 20:19 UTC (permalink / raw)
  To: Alexander Duyck, netdev, willemb, davem
In-Reply-To: <20180504182958.5194.62398.stgit@localhost.localdomain>



On 05/04/2018 11:30 AM, Alexander Duyck wrote:
> From: Alexander Duyck <alexander.h.duyck@intel.com>
> 
> This patch is meant to allow us to avoid having to recompute the checksum
> from scratch and have it passed as a parameter.
> 
> Instead of taking that approach we can take advantage of the fact that the
> length that was used to compute the existing checksum is included in the
> UDP header. If we cancel that out by adding the value XOR with 0xFFFF we
> can then just add the new length in and fold that into the new result.
> 

>  
> +	uh = udp_hdr(segs);
> +
> +	/* compute checksum adjustment based on old length versus new */
> +	newlen = htons(sizeof(*uh) + mss);
> +	check = ~csum_fold((__force __wsum)((__force u32)uh->check +
> +					    ((__force u32)uh->len ^ 0xFFFF) +
> +					    (__force u32)newlen));
> +


Can't this use csum_sub() instead of this ^ 0xFFFF trick ?

^ permalink raw reply

* Product Inquiry
From: Gerhard Kahmann @ 2018-05-05  3:17 UTC (permalink / raw)


?Dear Sir,

We recently visited your website, we were recommended by one of your customer and we are interested in your models, We will like to place an order from the list of your products. However, we would like to see your company's latest catalogs with the; minimum order quantity, delivery time/FOB, payment terms etc. Official order placement will follow as soon as possible.

Awaiting your prompt reply

Best Regards
Gerhard Kahmann
Purchasing Dept
*****************************

^ permalink raw reply

* Re: [RFC PATCH 2/2] net: mac808211: mac802154: use lockdep_assert_in_softirq() instead own warning
From: Peter Zijlstra @ 2018-05-04 20:09 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, tglx, Ingo Molnar, David S. Miller, Johannes Berg,
	Alexander Aring, Stefan Schmidt, netdev, linux-wireless,
	linux-wpan, Anna-Maria Gleixner
In-Reply-To: <20180504190735.izmzibhb66gjb5wr@linutronix.de>

On Fri, May 04, 2018 at 09:07:35PM +0200, Sebastian Andrzej Siewior wrote:
> On 2018-05-04 20:51:32 [+0200], Peter Zijlstra wrote:

> > softirqs disabled, ack that is exactly what it checks.
> > 
> > But afaict the assertion you introduced tests that we are _in_ softirq
> > context, which is not the same.
> 
> indeed, now it clicked. Given what I wrote in the cover letter would you
> be in favour of (a proper) lockdep_assert_BH_disabled() or the cheaper
> local_bh_enable() (assuming the network folks don't mind the cheaper
> version)?

Depends a bit on what the code wants I suppose. If the code is in fact
fine with the stronger in-softirq assertion, that'd be best. Otherwise I
don't object to a lockdep_assert_bh_disabled() to accompany the
lockdep_assert_irq_disabled() we already have either.

^ permalink raw reply

* Re: [net-next PATCH v2 3/8] udp: Do not pass MSS as parameter to GSO segmentation
From: Eric Dumazet @ 2018-05-04 20:08 UTC (permalink / raw)
  To: Alexander Duyck, netdev, willemb, davem
In-Reply-To: <20180504182941.5194.49667.stgit@localhost.localdomain>



On 05/04/2018 11:29 AM, Alexander Duyck wrote:
> From: Alexander Duyck <alexander.h.duyck@intel.com>
> 
> There is no point in passing MSS as a parameter for for the GSO
> segmentation call as it is already available via the shared info for the
> skb itself.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
> ---

Reviewed-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply

* Re: [net-next PATCH v2 2/8] udp: Verify that pulling UDP header in GSO segmentation doesn't fail
From: Eric Dumazet @ 2018-05-04 20:07 UTC (permalink / raw)
  To: Alexander Duyck, netdev, willemb, davem
In-Reply-To: <20180504182857.5194.45504.stgit@localhost.localdomain>



On 05/04/2018 11:29 AM, Alexander Duyck wrote:
> From: Alexander Duyck <alexander.h.duyck@intel.com>
> 
> We should verify that we can pull the UDP header before we attempt to do
> so. Otherwise if this fails we have no way of knowing and GSO will not work
> correctly.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
> ---

Reviewed-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply

* Re: [PATCH v5 0/6] firmware_loader: cleanups for v4.18
From: Luis R. Rodriguez @ 2018-05-04 19:58 UTC (permalink / raw)
  To: Krzysztof Halasa
  Cc: Luis R. Rodriguez, gregkh, akpm, keescook, josh, teg, wagi,
	hdegoede, andresx7, zohar, kubakici, shuah, mfuzzey, dhowells,
	pali.rohar, tiwai, kvalo, arend.vanspriel, zajec5, nbroeking,
	markivx, broonie, dmitry.torokhov, dwmw2, torvalds, Abhay_Salunke,
	bjorn.andersson, jewalt, oneukum, cantabile.desu, ast, hare, jejb,
	martin.petersen, davem, maco
In-Reply-To: <m3lgczcjt7.fsf@pm.waw.pl>

On Fri, May 04, 2018 at 09:17:08PM +0200, Krzysztof Halasa wrote:
> "Luis R. Rodriguez" <mcgrof@kernel.org> writes:
> 
> >   * CONFIG_WANXL --> CONFIG_WANXL_BUILD_FIRMWARE
> >   * CONFIG_SCSI_AIC79XX --> CONFIG_AIC79XX_BUILD_FIRMWARE
> >
> > To this day both of these drivers are building driver *firmwares* when
> > the option CONFIG_PREVENT_FIRMWARE_BUILD is disabled, and they don't
> > even make use of the firmware API at all.
> 
> Don't know for sure about Adaptec, but wanXL firmware fits every
> definition of "stable". BTW it's a 1997 or so early PCI card, built
> around the PLX PCI9060 bus mastering PCI bridge and Motorola 68360
> (the original QUICC) processor. Maximum bit rate of 2 Mb/s on each sync
> serial port.

So we can nuke CONFIG_WANXL_BUILD_FIRMWARE now?

> It's more about delivering the .S source for the firmware, I guess.
> Nobody is expected to build it. The fw is about 2.5 KB and is directly
> linked with the driver.

:P Future work I guess would be to just use the firmware API and stuff
it into linux-firmware?

  Luis

^ permalink raw reply

* Re: [PATCH v2 net-next 1/4] umh: introduce fork_usermode_blob() helper
From: Luis R. Rodriguez @ 2018-05-04 19:56 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: davem, daniel, torvalds, gregkh, luto, netdev, linux-kernel,
	kernel-team, Al Viro, David Howells, Mimi Zohar, Kees Cook,
	Andrew Morton, Dominik Brodowski, Hugh Dickins, Jani Nikula,
	Joonas Lahtinen, Rodrigo Vivi, David Airlie, Rafael J. Wysocki,
	Luis R. Rodriguez, Linux FS Devel
In-Reply-To: <20180503043604.1604587-2-ast@kernel.org>

What a mighty short list of reviewers. Adding some more. My review below.
I'd appreciate a Cc on future versions of these patches.

On Wed, May 02, 2018 at 09:36:01PM -0700, Alexei Starovoitov wrote:
> Introduce helper:
> int fork_usermode_blob(void *data, size_t len, struct umh_info *info);
> struct umh_info {
>        struct file *pipe_to_umh;
>        struct file *pipe_from_umh;
>        pid_t pid;
> };
> 
> that GPLed kernel modules (signed or unsigned) can use it to execute part
> of its own data as swappable user mode process.
> 
> The kernel will do:
> - mount "tmpfs"

Actually its a *shared* vfsmount tmpfs for all umh blobs.

> - allocate a unique file in tmpfs
> - populate that file with [data, data + len] bytes
> - user-mode-helper code will do_execve that file and, before the process
>   starts, the kernel will create two unix pipes for bidirectional
>   communication between kernel module and umh
> - close tmpfs file, effectively deleting it
> - the fork_usermode_blob will return zero on success and populate
>   'struct umh_info' with two unix pipes and the pid of the user process

But since its using UMH_WAIT_EXEC, all we can guarantee currently is the
inception point was intended, well though out, and will run, but the return
value in no way reflects the success or not of the execution. More below.

> As the first step in the development of the bpfilter project
> the fork_usermode_blob() helper is introduced to allow user mode code
> to be invoked from a kernel module. The idea is that user mode code plus
> normal kernel module code are built as part of the kernel build
> and installed as traditional kernel module into distro specified location,
> such that from a distribution point of view, there is
> no difference between regular kernel modules and kernel modules + umh code.
> Such modules can be signed, modprobed, rmmod, etc. The use of this new helper
> by a kernel module doesn't make it any special from kernel and user space
> tooling point of view.
> 
> Such approach enables kernel to delegate functionality traditionally done
> by the kernel modules into the user space processes (either root or !root) and
> reduces security attack surface of the new code. The buggy umh code would crash
> the user process, but not the kernel. Another advantage is that umh code
> of the kernel module can be debugged and tested out of user space
> (e.g. opening the possibility to run clang sanitizers, fuzzers or
> user space test suites on the umh code).
> In case of the bpfilter project such architecture allows complex control plane
> to be done in the user space while bpf based data plane stays in the kernel.
> 
> Since umh can crash, can be oom-ed by the kernel, killed by the admin,
> the kernel module that uses them (like bpfilter) needs to manage life
> time of umh on its own via two unix pipes and the pid of umh.
> 
> The exit code of such kernel module should kill the umh it started,
> so that rmmod of the kernel module will cleanup the corresponding umh.
> Just like if the kernel module does kmalloc() it should kfree() it in the exit code.
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
>  fs/exec.c               |  38 ++++++++---
>  include/linux/binfmts.h |   1 +
>  include/linux/umh.h     |  12 ++++
>  kernel/umh.c            | 176 +++++++++++++++++++++++++++++++++++++++++++++++-
>  4 files changed, 215 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index 183059c427b9..30a36c2a39bf 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1706,14 +1706,13 @@ static int exec_binprm(struct linux_binprm *bprm)
>  /*
>   * sys_execve() executes a new program.
>   */
> -static int do_execveat_common(int fd, struct filename *filename,
> -			      struct user_arg_ptr argv,
> -			      struct user_arg_ptr envp,
> -			      int flags)
> +static int __do_execve_file(int fd, struct filename *filename,
> +			    struct user_arg_ptr argv,
> +			    struct user_arg_ptr envp,
> +			    int flags, struct file *file)
>  {
>  	char *pathbuf = NULL;
>  	struct linux_binprm *bprm;
> -	struct file *file;
>  	struct files_struct *displaced;
>  	int retval;

Keeping in mind a fuzzer...

Note, right below this, and not shown here in the hunk, is:

        if (IS_ERR(filename))                                                   
                return PTR_ERR(filename)
>  
> @@ -1752,7 +1751,8 @@ static int do_execveat_common(int fd, struct filename *filename,
>  	check_unsafe_exec(bprm);
>  	current->in_execve = 1;
>  
> -	file = do_open_execat(fd, filename, flags);
> +	if (!file)
> +		file = do_open_execat(fd, filename, flags);


Here we now seem to allow !file and open the file with the passed fd as in
the old days. This is an expected change.

>  	retval = PTR_ERR(file);
>  	if (IS_ERR(file))
>  		goto out_unmark;
> @@ -1760,7 +1760,9 @@ static int do_execveat_common(int fd, struct filename *filename,
>  	sched_exec();
>  
>  	bprm->file = file;
> -	if (fd == AT_FDCWD || filename->name[0] == '/') {
> +	if (!filename) {

If anything shouldn't this be:

	if (IS_ERR(filename))

But, wouldn't the above first branch in the routine catch this?

> +		bprm->filename = "none";

Given this seems like a desirable branch which was tested, wonder how this
ever got set if the above branch in the first hunk I noted hit true?

In any case, we seem to have two cases, can we rule out the exact requirements
at the top so we can bail out with an error code if one or the other way to
call this function does not align with expectations?

> +	} else if (fd == AT_FDCWD || filename->name[0] == '/') {
>  		bprm->filename = filename->name;
>  	} else {
>  		if (filename->name[0] == '\0')
> @@ -1826,7 +1828,8 @@ static int do_execveat_common(int fd, struct filename *filename,
>  	task_numa_free(current);
>  	free_bprm(bprm);
>  	kfree(pathbuf);
> -	putname(filename);
> +	if (filename)
> +		putname(filename);
>  	if (displaced)
>  		put_files_struct(displaced);
>  	return retval;
> @@ -1849,10 +1852,27 @@ static int do_execveat_common(int fd, struct filename *filename,
>  	if (displaced)
>  		reset_files_struct(displaced);
>  out_ret:
> -	putname(filename);
> +	if (filename)
> +		putname(filename);
>  	return retval;
>  }
>  
> +static int do_execveat_common(int fd, struct filename *filename,

Further signs the filename is now optional. But I don't understand how these
branches ever be true, but perhaps I'm missing something?

> +			      struct user_arg_ptr argv,
> +			      struct user_arg_ptr envp,
> +			      int flags)
> +{
> +	return __do_execve_file(fd, filename, argv, envp, flags, NULL);
> +}
> +
> +int do_execve_file(struct file *file, void *__argv, void *__envp)
> +{
> +	struct user_arg_ptr argv = { .ptr.native = __argv };
> +	struct user_arg_ptr envp = { .ptr.native = __envp };
> +
> +	return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
> +}

Or maybe do the semantics expectations checks here, so we don't clutter
do_execveat_common() with them?

> +
>  int do_execve(struct filename *filename,
>  	const char __user *const __user *__argv,
>  	const char __user *const __user *__envp)
> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
> index 4955e0863b83..c05f24fac4f6 100644
> --- a/include/linux/binfmts.h
> +++ b/include/linux/binfmts.h
> @@ -150,5 +150,6 @@ extern int do_execveat(int, struct filename *,
>  		       const char __user * const __user *,
>  		       const char __user * const __user *,
>  		       int);
> +int do_execve_file(struct file *file, void *__argv, void *__envp);
>  
>  #endif /* _LINUX_BINFMTS_H */
> diff --git a/include/linux/umh.h b/include/linux/umh.h
> index 244aff638220..5c812acbb80a 100644
> --- a/include/linux/umh.h
> +++ b/include/linux/umh.h
> @@ -22,8 +22,10 @@ struct subprocess_info {
>  	const char *path;
>  	char **argv;
>  	char **envp;
> +	struct file *file;
>  	int wait;
>  	int retval;
> +	pid_t pid;
>  	int (*init)(struct subprocess_info *info, struct cred *new);
>  	void (*cleanup)(struct subprocess_info *info);
>  	void *data;

While at it, can you kdocify struct subprocess_info and add new docs for at
least these two entires you are adding ?

> @@ -38,6 +40,16 @@ call_usermodehelper_setup(const char *path, char **argv, char **envp,
>  			  int (*init)(struct subprocess_info *info, struct cred *new),
>  			  void (*cleanup)(struct subprocess_info *), void *data);
>  
> +struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
> +			  int (*init)(struct subprocess_info *info, struct cred *new),
> +			  void (*cleanup)(struct subprocess_info *), void *data);

Likewise but on the umc.c file.

> +struct umh_info {
> +	struct file *pipe_to_umh;
> +	struct file *pipe_from_umh;
> +	pid_t pid;
> +};

Likewise.

> +int fork_usermode_blob(void *data, size_t len, struct umh_info *info);

Likewise but on the umc.c files.

> +
>  extern int
>  call_usermodehelper_exec(struct subprocess_info *info, int wait);
>  
> diff --git a/kernel/umh.c b/kernel/umh.c
> index f76b3ff876cf..c3f418d7d51a 100644
> --- a/kernel/umh.c
> +++ b/kernel/umh.c
> @@ -25,6 +25,8 @@
>  #include <linux/ptrace.h>
>  #include <linux/async.h>
>  #include <linux/uaccess.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/pipe_fs_i.h>
>  
>  #include <trace/events/module.h>
>  
> @@ -97,9 +99,13 @@ static int call_usermodehelper_exec_async(void *data)
>  
>  	commit_creds(new);
>  
> -	retval = do_execve(getname_kernel(sub_info->path),
> -			   (const char __user *const __user *)sub_info->argv,
> -			   (const char __user *const __user *)sub_info->envp);
> +	if (sub_info->file)
> +		retval = do_execve_file(sub_info->file,
> +					sub_info->argv, sub_info->envp);
> +	else
> +		retval = do_execve(getname_kernel(sub_info->path),
> +				   (const char __user *const __user *)sub_info->argv,
> +				   (const char __user *const __user *)sub_info->envp);
>  out:
>  	sub_info->retval = retval;
>  	/*
> @@ -185,6 +191,8 @@ static void call_usermodehelper_exec_work(struct work_struct *work)
>  		if (pid < 0) {
>  			sub_info->retval = pid;
>  			umh_complete(sub_info);
> +		} else {
> +			sub_info->pid = pid;
>  		}
>  	}
>  }
> @@ -393,6 +401,168 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv,
>  }
>  EXPORT_SYMBOL(call_usermodehelper_setup);
>  
> +struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
> +		int (*init)(struct subprocess_info *info, struct cred *new),
> +		void (*cleanup)(struct subprocess_info *info), void *data)

Should be static, no other users outside of this file.
Please use umh_setup_file().

> +{
> +	struct subprocess_info *sub_info;

Considering a possible fuzzer triggering random data we should probably
return NULL early and avoid the kzalloc if:

	if (!file || !init || !cleanup)
		return NULL;

Is data optional? The kdoc could clarify this.


> +
> +	sub_info = kzalloc(sizeof(struct subprocess_info), GFP_KERNEL);
> +	if (!sub_info)
> +		return NULL;
> +
> +	INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
> +	sub_info->path = "none";
> +	sub_info->file = file;
> +	sub_info->init = init;
> +	sub_info->cleanup = cleanup;
> +	sub_info->data = data;
> +	return sub_info;
> +}
> +
> +static struct vfsmount *umh_fs;
> +
> +static int init_tmpfs(void)

Please use umh_init_tmpfs(). Also see init/main.c do_basic_setup() which calls
usermodehelper_enable() prior to do_initcalls(). Now, fortunately TMPFS is only
bool, saving us from some races and we do call tmpfs's init first shmem_init():

static void __init do_basic_setup(void)
{
	cpuset_init_smp();
	shmem_init();
	driver_init();
	init_irq_proc();
	do_ctors();
	usermodehelper_enable();
	do_initcalls();
}

But it also means we're enabling your new call call fork_usermode_blob() on
early init code even if we're not setup. Since this umh tmpfs vfsmount is
shared I'd say just call this init right before usermodehelper_enable()
on do_basic_setup().

> +{
> +	struct file_system_type *type;
> +
> +	if (umh_fs)
> +		return 0;
> +	type = get_fs_type("tmpfs");
> +	if (!type)
> +		return -ENODEV;
> +	umh_fs = kern_mount(type);
> +	if (IS_ERR(umh_fs)) {
> +		int err = PTR_ERR(umh_fs);
> +
> +		put_filesystem(type);
> +		umh_fs = NULL;
> +		return err;
> +	}
> +	return 0;
> +}
> +
> +static int alloc_tmpfs_file(size_t size, struct file **filp)

Please use umh_alloc_tmpfs_file()

> +{
> +	struct file *file;
> +	int err;
> +
> +	err = init_tmpfs();
> +	if (err)
> +		return err;
> +	file = shmem_file_setup_with_mnt(umh_fs, "umh", size, VM_NORESERVE);
> +	if (IS_ERR(file))
> +		return PTR_ERR(file);
> +	*filp = file;
> +	return 0;
> +}
> +
> +static int populate_file(struct file *file, const void *data, size_t size)

Please use umh_populate_file()

> +{
> +	size_t offset = 0;
> +	int err;
> +
> +	do {
> +		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
> +		struct page *page;
> +		void *pgdata, *vaddr;
> +
> +		err = pagecache_write_begin(file, file->f_mapping, offset, len,
> +					    0, &page, &pgdata);
> +		if (err < 0)
> +			goto fail;
> +
> +		vaddr = kmap(page);
> +		memcpy(vaddr, data, len);
> +		kunmap(page);
> +
> +		err = pagecache_write_end(file, file->f_mapping, offset, len,
> +					  len, page, pgdata);
> +		if (err < 0)
> +			goto fail;
> +
> +		size -= len;
> +		data += len;
> +		offset += len;
> +	} while (size);

Character for character, this looks like a wonderful copy and paste from
i915_gem_object_create_from_data()'s own loop which does the same exact
thing. Perhaps its time for a helper on mm/filemap.c with an export so
if a bug is fixed in one place its fixed in both places.

> +	return 0;
> +fail:
> +	return err;
> +}
> +
> +static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)

The function name umh_pipe_setup() is also used on fs/coredump.c, with the same
prototype, perhaps rename that before we take this on, even if both are static.

> +{
> +	struct umh_info *umh_info = info->data;
> +	struct file *from_umh[2];
> +	struct file *to_umh[2];
> +	int err;
> +
> +	/* create pipe to send data to umh */
> +	err = create_pipe_files(to_umh, 0);
> +	if (err)
> +		return err;
> +	err = replace_fd(0, to_umh[0], 0);
> +	fput(to_umh[0]);
> +	if (err < 0) {
> +		fput(to_umh[1]);
> +		return err;
> +	}
> +
> +	/* create pipe to receive data from umh */
> +	err = create_pipe_files(from_umh, 0);
> +	if (err) {
> +		fput(to_umh[1]);
> +		replace_fd(0, NULL, 0);
> +		return err;
> +	}
> +	err = replace_fd(1, from_umh[1], 0);
> +	fput(from_umh[1]);
> +	if (err < 0) {
> +		fput(to_umh[1]);
> +		replace_fd(0, NULL, 0);
> +		fput(from_umh[0]);
> +		return err;
> +	}
> +
> +	umh_info->pipe_to_umh = to_umh[1];
> +	umh_info->pipe_from_umh = from_umh[0];
> +	return 0;
> +}
> +
> +static void umh_save_pid(struct subprocess_info *info)
> +{
> +	struct umh_info *umh_info = info->data;
> +
> +	umh_info->pid = info->pid;
> +}
> +
> +int fork_usermode_blob(void *data, size_t len, struct umh_info *info)

Please use umh_fork_blob()

> +{
> +	struct subprocess_info *sub_info;
> +	struct file *file = NULL;
> +	int err;
> +
> +	err = alloc_tmpfs_file(len, &file);
> +	if (err)
> +		return err;
> +
> +	err = populate_file(file, data, len);
> +	if (err)
> +		goto out;
> +
> +	err = -ENOMEM;
> +	sub_info = call_usermodehelper_setup_file(file, umh_pipe_setup,
> +						  umh_save_pid, info);
> +	if (!sub_info)
> +		goto out;
> +
> +	err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);

Alright, neat, so to be clear, we're just glad to try inception, we have no
clue or idea what the real return value would be, its up to the caller to track
the progress somehow?

Can you add a kdoc entry for this and clarify requirements?

Also, can you extend lib/test_kmod.c with a test case for this with its own
demo and try to blow it up?

I hadn't tried suspend/resume during a kmod test, but since we're using a
kernel_thread() I wouldn't be surprised if we barf while stress testing the
module loader. Its surely a corner case, but better mention that now than cry
later if we get heavy umh modules and all of a sudden we start using this for
whatever reason close to suspend.

  Luis

> +out:
> +	fput(file);
> +	return err;
> +}
> +EXPORT_SYMBOL_GPL(fork_usermode_blob);
> +
>  /**
>   * call_usermodehelper_exec - start a usermode application
>   * @sub_info: information about the subprocessa
> -- 
> 2.9.5


-- 
Do not panic

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox