Netdev List
 help / color / mirror / Atom feed
* [PATCH v2 bpf-next 2/3] bpf: implement CAP_BPF
From: Alexei Starovoitov @ 2019-08-29  5:12 UTC (permalink / raw)
  To: luto; +Cc: davem, peterz, rostedt, netdev, bpf, kernel-team, linux-api
In-Reply-To: <20190829051253.1927291-1-ast@kernel.org>

Implement permissions as stated in uapi/linux/capability.h

Note that CAP_SYS_ADMIN is replaced with CAP_BPF.
All existing applications that use BPF do not drop all caps
and keep only CAP_SYS_ADMIN before doing bpf() syscall.
Hence it's highly unlikely that existing code will break.
If there will be reports of breakage then CAP_SYS_ADMIN
would be allowed as well with "it's usage is deprecated" message
similar to commit ee24aebffb75 ("cap_syslog: accept CAP_SYS_ADMIN for now")

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h                      |  1 +
 kernel/bpf/arraymap.c                       |  2 +-
 kernel/bpf/cgroup.c                         |  2 +-
 kernel/bpf/core.c                           |  9 +++-
 kernel/bpf/hashtab.c                        |  4 +-
 kernel/bpf/lpm_trie.c                       |  2 +-
 kernel/bpf/queue_stack_maps.c               |  2 +-
 kernel/bpf/reuseport_array.c                |  2 +-
 kernel/bpf/stackmap.c                       |  2 +-
 kernel/bpf/syscall.c                        | 32 ++++++++------
 kernel/bpf/verifier.c                       |  4 +-
 kernel/trace/bpf_trace.c                    |  2 +-
 net/core/bpf_sk_storage.c                   |  2 +-
 net/core/filter.c                           | 10 +++--
 tools/testing/selftests/bpf/test_verifier.c | 46 +++++++++++++++++----
 15 files changed, 83 insertions(+), 39 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 92c6e31fb008..16cea50af014 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -857,6 +857,7 @@ static inline bool bpf_dump_raw_ok(void)
 	return kallsyms_show_value() == 1;
 }
 
+bool cap_bpf_tracing(void);
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 1c65ce0098a9..045e30b7160d 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -73,7 +73,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
 	int ret, numa_node = bpf_map_attr_numa_node(attr);
 	u32 elem_size, index_mask, max_entries;
-	bool unpriv = !capable(CAP_SYS_ADMIN);
+	bool unpriv = !capable(CAP_BPF);
 	u64 cost, array_size, mask64;
 	struct bpf_map_memory mem;
 	struct bpf_array *array;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 6a6a154cfa7b..97f733354421 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -795,7 +795,7 @@ cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_get_current_cgroup_id:
 		return &bpf_get_current_cgroup_id_proto;
 	case BPF_FUNC_trace_printk:
-		if (capable(CAP_SYS_ADMIN))
+		if (cap_bpf_tracing())
 			return bpf_get_trace_printk_proto();
 		/* fall through */
 	default:
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8191a7db2777..16ed80835156 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -646,7 +646,7 @@ static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
 void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 {
 	if (!bpf_prog_kallsyms_candidate(fp) ||
-	    !capable(CAP_SYS_ADMIN))
+	    !capable(CAP_BPF))
 		return;
 
 	spin_lock_bh(&bpf_lock);
@@ -768,7 +768,7 @@ static int bpf_jit_charge_modmem(u32 pages)
 {
 	if (atomic_long_add_return(pages, &bpf_jit_current) >
 	    (bpf_jit_limit >> PAGE_SHIFT)) {
-		if (!capable(CAP_SYS_ADMIN)) {
+		if (!capable(CAP_BPF)) {
 			atomic_long_sub(pages, &bpf_jit_current);
 			return -EPERM;
 		}
@@ -2104,6 +2104,11 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
 DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
 EXPORT_SYMBOL(bpf_stats_enabled_key);
 
+bool cap_bpf_tracing(void)
+{
+	return capable(CAP_BPF) && capable(CAP_TRACING);
+}
+
 /* All definitions of tracepoints related to BPF. */
 #define CREATE_TRACE_POINTS
 #include <linux/bpf_trace.h>
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 22066a62c8c9..f459315625ac 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -244,9 +244,9 @@ static int htab_map_alloc_check(union bpf_attr *attr)
 	BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
 		     offsetof(struct htab_elem, hash_node.pprev));
 
-	if (lru && !capable(CAP_SYS_ADMIN))
+	if (lru && !capable(CAP_BPF))
 		/* LRU implementation is much complicated than other
-		 * maps.  Hence, limit to CAP_SYS_ADMIN for now.
+		 * maps.  Hence, limit to CAP_BPF.
 		 */
 		return -EPERM;
 
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 56e6c75d354d..a45fa5464d98 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -543,7 +543,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
 	u64 cost = sizeof(*trie), cost_per_node;
 	int ret;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_BPF))
 		return ERR_PTR(-EPERM);
 
 	/* check sanity of attributes */
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index f697647ceb54..ca0ba9edca86 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -45,7 +45,7 @@ static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
 /* Called from syscall */
 static int queue_stack_map_alloc_check(union bpf_attr *attr)
 {
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_BPF))
 		return -EPERM;
 
 	/* check sanity of attributes */
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 50c083ba978c..bfad7d41a061 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -154,7 +154,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
 	struct bpf_map_memory mem;
 	u64 array_size;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_BPF))
 		return ERR_PTR(-EPERM);
 
 	array_size = sizeof(*array);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 052580c33d26..beaff32fccc5 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -90,7 +90,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 	u64 cost, n_buckets;
 	int err;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_TRACING))
 		return ERR_PTR(-EPERM);
 
 	if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c0f62fd67c6b..ef7b06ca30e5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1176,7 +1176,7 @@ static int map_freeze(const union bpf_attr *attr)
 		err = -EBUSY;
 		goto err_put;
 	}
-	if (!capable(CAP_SYS_ADMIN)) {
+	if (!capable(CAP_BPF)) {
 		err = -EPERM;
 		goto err_put;
 	}
@@ -1634,7 +1634,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
 	    (attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
-	    !capable(CAP_SYS_ADMIN))
+	    !capable(CAP_BPF))
 		return -EPERM;
 
 	/* copy eBPF program license from user space */
@@ -1647,11 +1647,11 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	is_gpl = license_is_gpl_compatible(license);
 
 	if (attr->insn_cnt == 0 ||
-	    attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
+	    attr->insn_cnt > (capable(CAP_BPF) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
 		return -E2BIG;
 	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
 	    type != BPF_PROG_TYPE_CGROUP_SKB &&
-	    !capable(CAP_SYS_ADMIN))
+	    !capable(CAP_BPF))
 		return -EPERM;
 
 	bpf_prog_load_fixup_attach_type(attr);
@@ -1802,6 +1802,9 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 	char tp_name[128];
 	int tp_fd, err;
 
+	if (!cap_bpf_tracing())
+		return -EPERM;
+
 	if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name),
 			      sizeof(tp_name) - 1) < 0)
 		return -EFAULT;
@@ -2080,7 +2083,10 @@ static int bpf_prog_test_run(const union bpf_attr *attr,
 	struct bpf_prog *prog;
 	int ret = -ENOTSUPP;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_NET_ADMIN) || !capable(CAP_BPF))
+		/* test_run callback is available for networking progs only.
+		 * Add cap_bpf_tracing() above when tracing progs become runable.
+		 */
 		return -EPERM;
 	if (CHECK_ATTR(BPF_PROG_TEST_RUN))
 		return -EINVAL;
@@ -2117,7 +2123,7 @@ static int bpf_obj_get_next_id(const union bpf_attr *attr,
 	if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX)
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_BPF))
 		return -EPERM;
 
 	next_id++;
@@ -2143,7 +2149,7 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_BPF))
 		return -EPERM;
 
 	spin_lock_bh(&prog_idr_lock);
@@ -2177,7 +2183,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
 	    attr->open_flags & ~BPF_OBJ_FLAG_MASK)
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_BPF))
 		return -EPERM;
 
 	f_flags = bpf_get_file_flag(attr->open_flags);
@@ -2352,7 +2358,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	info.run_time_ns = stats.nsecs;
 	info.run_cnt = stats.cnt;
 
-	if (!capable(CAP_SYS_ADMIN)) {
+	if (!capable(CAP_BPF)) {
 		info.jited_prog_len = 0;
 		info.xlated_prog_len = 0;
 		info.nr_jited_ksyms = 0;
@@ -2670,7 +2676,7 @@ static int bpf_btf_load(const union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_BTF_LOAD))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_BPF))
 		return -EPERM;
 
 	return btf_new_fd(attr);
@@ -2683,7 +2689,7 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_BPF))
 		return -EPERM;
 
 	return btf_get_fd_by_id(attr->btf_id);
@@ -2752,7 +2758,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
 	if (CHECK_ATTR(BPF_TASK_FD_QUERY))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!cap_bpf_tracing())
 		return -EPERM;
 
 	if (attr->task_fd_query.flags != 0)
@@ -2820,7 +2826,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	union bpf_attr attr = {};
 	int err;
 
-	if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
+	if (sysctl_unprivileged_bpf_disabled && !capable(CAP_BPF))
 		return -EPERM;
 
 	err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 10c0ff93f52b..5810e8cc9342 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -987,7 +987,7 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg)
 	reg->umax_value = U64_MAX;
 
 	/* constant backtracking is enabled for root only for now */
-	reg->precise = capable(CAP_SYS_ADMIN) ? false : true;
+	reg->precise = capable(CAP_BPF) ? false : true;
 }
 
 /* Mark a register as having a completely unknown (scalar) value. */
@@ -9233,7 +9233,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 		env->insn_aux_data[i].orig_idx = i;
 	env->prog = *prog;
 	env->ops = bpf_verifier_ops[env->prog->type];
-	is_priv = capable(CAP_SYS_ADMIN);
+	is_priv = capable(CAP_BPF);
 
 	/* grab the mutex to protect few globals used by verifier */
 	if (!is_priv)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ca1255d14576..2bf58ff5bf75 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1246,7 +1246,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
 	u32 *ids, prog_cnt, ids_len;
 	int ret;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!cap_bpf_tracing())
 		return -EPERM;
 	if (event->attr.type != PERF_TYPE_TRACEPOINT)
 		return -EINVAL;
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index da5639a5bd3b..0b29f6abbeba 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -616,7 +616,7 @@ static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
 	    !attr->btf_key_type_id || !attr->btf_value_type_id)
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_BPF))
 		return -EPERM;
 
 	if (attr->value_size >= KMALLOC_MAX_SIZE -
diff --git a/net/core/filter.c b/net/core/filter.c
index 0c1059cdad3d..986277abfde2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5990,7 +5990,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		break;
 	}
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_BPF))
 		return NULL;
 
 	switch (func_id) {
@@ -5999,7 +5999,9 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_spin_unlock:
 		return &bpf_spin_unlock_proto;
 	case BPF_FUNC_trace_printk:
-		return bpf_get_trace_printk_proto();
+		if (cap_bpf_tracing())
+			return bpf_get_trace_printk_proto();
+		/* fall through */
 	default:
 		return NULL;
 	}
@@ -6563,7 +6565,7 @@ static bool cg_skb_is_valid_access(int off, int size,
 		return false;
 	case bpf_ctx_range(struct __sk_buff, data):
 	case bpf_ctx_range(struct __sk_buff, data_end):
-		if (!capable(CAP_SYS_ADMIN))
+		if (!capable(CAP_BPF))
 			return false;
 		break;
 	}
@@ -6575,7 +6577,7 @@ static bool cg_skb_is_valid_access(int off, int size,
 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
 			break;
 		case bpf_ctx_range(struct __sk_buff, tstamp):
-			if (!capable(CAP_SYS_ADMIN))
+			if (!capable(CAP_BPF))
 				return false;
 			break;
 		default:
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 44e2d640b088..91a7f25512ca 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -805,10 +805,20 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	}
 }
 
+struct libcap {
+	struct __user_cap_header_struct hdr;
+	struct __user_cap_data_struct data[2];
+};
+
 static int set_admin(bool admin)
 {
 	cap_t caps;
-	const cap_value_t cap_val = CAP_SYS_ADMIN;
+	/* need CAP_BPF to load progs and CAP_NET_ADMIN to run networking progs,
+	 * and CAP_TRACING to create stackmap
+	 */
+	const cap_value_t cap_net_admin = CAP_NET_ADMIN;
+	const cap_value_t cap_sys_admin = CAP_SYS_ADMIN;
+	struct libcap *cap;
 	int ret = -1;
 
 	caps = cap_get_proc();
@@ -816,11 +826,26 @@ static int set_admin(bool admin)
 		perror("cap_get_proc");
 		return -1;
 	}
-	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+	cap = (struct libcap *)caps;
+	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_sys_admin, CAP_CLEAR)) {
+		perror("cap_set_flag clear admin");
+		goto out;
+	}
+	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_admin,
 				admin ? CAP_SET : CAP_CLEAR)) {
-		perror("cap_set_flag");
+		perror("cap_set_flag set_or_clear net");
 		goto out;
 	}
+	/* libcap is likely old and simply ignores CAP_BPF and CAP_TRACING,
+	 * so update effective bits manually
+	 */
+	if (admin) {
+		cap->data[1].effective |= 1 << (38 /* CAP_BPF */ - 32);
+		cap->data[1].effective |= 1 << (39 /* CAP_TRACING */ - 32);
+	} else {
+		cap->data[1].effective &= ~(1 << (38 - 32));
+		cap->data[1].effective &= ~(1 << (39 - 32));
+	}
 	if (cap_set_proc(caps)) {
 		perror("cap_set_proc");
 		goto out;
@@ -1012,9 +1037,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 static bool is_admin(void)
 {
+	cap_flag_value_t net_priv = CAP_CLEAR;
+	bool tracing_priv = false;
+	bool bpf_priv = false;
+	struct libcap *cap;
 	cap_t caps;
-	cap_flag_value_t sysadmin = CAP_CLEAR;
-	const cap_value_t cap_val = CAP_SYS_ADMIN;
 
 #ifdef CAP_IS_SUPPORTED
 	if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
@@ -1027,11 +1054,14 @@ static bool is_admin(void)
 		perror("cap_get_proc");
 		return false;
 	}
-	if (cap_get_flag(caps, cap_val, CAP_EFFECTIVE, &sysadmin))
-		perror("cap_get_flag");
+	cap = (struct libcap *)caps;
+	bpf_priv = cap->data[1].effective & (1 << (38/* CAP_BPF */ - 32));
+	tracing_priv = cap->data[1].effective & (1 << (39/* CAP_TRACING */ - 32));
+	if (cap_get_flag(caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &net_priv))
+		perror("cap_get_flag NET");
 	if (cap_free(caps))
 		perror("cap_free");
-	return (sysadmin == CAP_SET);
+	return bpf_priv && tracing_priv && net_priv == CAP_SET;
 }
 
 static void get_unpriv_disabled()
-- 
2.20.0


^ permalink raw reply related

* [PATCH v2 bpf-next 3/3] perf: implement CAP_TRACING
From: Alexei Starovoitov @ 2019-08-29  5:12 UTC (permalink / raw)
  To: luto; +Cc: davem, peterz, rostedt, netdev, bpf, kernel-team, linux-api
In-Reply-To: <20190829051253.1927291-1-ast@kernel.org>

Implement permissions as stated in uapi/linux/capability.h

Similar to CAP_BPF it's highly unlikely that s/CAP_SYS_ADMIN/CAP_TRACING/
replacement will cause user breakage.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/powerpc/perf/core-book3s.c |  4 ++--
 arch/x86/events/intel/bts.c     |  2 +-
 arch/x86/events/intel/core.c    |  2 +-
 arch/x86/events/intel/p4.c      |  2 +-
 kernel/events/core.c            | 14 +++++++-------
 kernel/events/hw_breakpoint.c   |  2 +-
 kernel/trace/trace_event_perf.c |  4 ++--
 7 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index ca92e01d0bd1..ddb5f4b81023 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -204,7 +204,7 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
 	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
 		*addrp = mfspr(SPRN_SDAR);
 
-	if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
+	if (perf_paranoid_kernel() && !capable(CAP_TRACING) &&
 		is_kernel_addr(mfspr(SPRN_SDAR)))
 		*addrp = 0;
 }
@@ -472,7 +472,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 			 * exporting it to userspace (avoid exposure of regions
 			 * where we could have speculative execution)
 			 */
-			if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
+			if (perf_paranoid_kernel() && !capable(CAP_TRACING) &&
 				is_kernel_addr(addr))
 				continue;
 
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 5ee3fed881d3..848a104a0d33 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -550,7 +550,7 @@ static int bts_event_init(struct perf_event *event)
 	 * users to profile the kernel.
 	 */
 	if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
-	    !capable(CAP_SYS_ADMIN))
+	    !capable(CAP_TRACING))
 		return -EACCES;
 
 	if (x86_add_exclusive(x86_lbr_exclusive_bts))
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 648260b5f367..08714d33e566 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3307,7 +3307,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
 	if (x86_pmu.version < 3)
 		return -EINVAL;
 
-	if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+	if (perf_paranoid_cpu() && !capable(CAP_TRACING))
 		return -EACCES;
 
 	event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index dee579efb2b2..abf20e33b523 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -776,7 +776,7 @@ static int p4_validate_raw_event(struct perf_event *event)
 	 * the user needs special permissions to be able to use it
 	 */
 	if (p4_ht_active() && p4_event_bind_map[v].shared) {
-		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+		if (perf_paranoid_cpu() && !capable(CAP_TRACING))
 			return -EACCES;
 	}
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0463c1151bae..a0e5495cad53 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4134,7 +4134,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
 
 	if (!task) {
 		/* Must be root to operate on a CPU event: */
-		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+		if (perf_paranoid_cpu() && !capable(CAP_TRACING))
 			return ERR_PTR(-EACCES);
 
 		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
@@ -8741,7 +8741,7 @@ static int perf_kprobe_event_init(struct perf_event *event)
 	if (event->attr.type != perf_kprobe.type)
 		return -ENOENT;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_TRACING))
 		return -EACCES;
 
 	/*
@@ -8801,7 +8801,7 @@ static int perf_uprobe_event_init(struct perf_event *event)
 	if (event->attr.type != perf_uprobe.type)
 		return -ENOENT;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_TRACING))
 		return -EACCES;
 
 	/*
@@ -10588,7 +10588,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 		}
 		/* privileged levels capture (kernel, hv): check permissions */
 		if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
-		    && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+		    && perf_paranoid_kernel() && !capable(CAP_TRACING))
 			return -EACCES;
 	}
 
@@ -10807,12 +10807,12 @@ SYSCALL_DEFINE5(perf_event_open,
 		return err;
 
 	if (!attr.exclude_kernel) {
-		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+		if (perf_paranoid_kernel() && !capable(CAP_TRACING))
 			return -EACCES;
 	}
 
 	if (attr.namespaces) {
-		if (!capable(CAP_SYS_ADMIN))
+		if (!capable(CAP_TRACING))
 			return -EACCES;
 	}
 
@@ -10826,7 +10826,7 @@ SYSCALL_DEFINE5(perf_event_open,
 
 	/* Only privileged users can get physical addresses */
 	if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
-	    perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+	    perf_paranoid_kernel() && !capable(CAP_TRACING))
 		return -EACCES;
 
 	/*
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index c5cd852fe86b..6174d547699d 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -404,7 +404,7 @@ static int hw_breakpoint_parse(struct perf_event *bp,
 		 * Don't let unprivileged users set a breakpoint in the trap
 		 * path to avoid trap recursion attacks.
 		 */
-		if (!capable(CAP_SYS_ADMIN))
+		if (!capable(CAP_TRACING))
 			return -EPERM;
 	}
 
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 0892e38ed6fb..1ec3e08d4539 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -46,7 +46,7 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event,
 
 	/* The ftrace function trace is allowed only for root. */
 	if (ftrace_event_is_function(tp_event)) {
-		if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
+		if (perf_paranoid_tracepoint_raw() && !capable(CAP_TRACING))
 			return -EPERM;
 
 		if (!is_sampling_event(p_event))
@@ -82,7 +82,7 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event,
 	 * ...otherwise raw tracepoint data can be a severe data leak,
 	 * only allow root to have these.
 	 */
-	if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
+	if (perf_paranoid_tracepoint_raw() && !capable(CAP_TRACING))
 		return -EPERM;
 
 	return 0;
-- 
2.20.0


^ permalink raw reply related

* [PATCH v2 bpf-next 1/3] capability: introduce CAP_BPF and CAP_TRACING
From: Alexei Starovoitov @ 2019-08-29  5:12 UTC (permalink / raw)
  To: luto; +Cc: davem, peterz, rostedt, netdev, bpf, kernel-team, linux-api

CAP_BPF allows the following BPF operations:
- Loading all types of BPF programs
- Creating all types of BPF maps except:
   - stackmap that needs CAP_TRACING
   - devmap that needs CAP_NET_ADMIN
   - cpumap that needs CAP_SYS_ADMIN
- Advanced verifier features
  - Indirect variable access
  - Bounded loops
  - BPF to BPF function calls
  - Scalar precision tracking
  - Larger complexity limits
  - Dead code elimination
  - And potentially other features
- Use of pointer-to-integer conversions in BPF programs
- Bypassing of speculation attack hardening measures
- Loading BPF Type Format (BTF) data
- Iterate system wide loaded programs, maps, BTF objects
- Retrieve xlated and JITed code of BPF programs
- Access maps and programs via id
- Use bpf_spin_lock() helper

CAP_BPF and CAP_TRACING together allow the following:
- bpf_probe_read to read arbitrary kernel memory
- bpf_trace_printk to print data to ftrace ring buffer
- Attach to raw_tracepoint
- Query association between kprobe/tracepoint and bpf program

CAP_BPF and CAP_NET_ADMIN together allow the following:
- Attach to cgroup-bpf hooks and query
- skb, xdp, flow_dissector test_run command

CAP_NET_ADMIN allows:
- Attach networking bpf programs to xdp, tc, lwt, flow dissector

CAP_TRACING allows:
- Full use of perf_event_open(), similarly to the effect of
  kernel.perf_event_paranoid == -1
- Full use of tracefs
- Creation of [ku][ret]probe
- Accessing arbitrary kernel memory via kprobe + probe_kernel_read
- Attach tracing bpf programs to perf events
- Access to kallsyms

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/capability.h     | 51 ++++++++++++++++++++++++++++-
 security/selinux/include/classmap.h |  4 +--
 2 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h
index 240fdb9a60f6..664e07d12888 100644
--- a/include/uapi/linux/capability.h
+++ b/include/uapi/linux/capability.h
@@ -366,8 +366,57 @@ struct vfs_ns_cap_data {
 
 #define CAP_AUDIT_READ		37
 
+/*
+ * CAP_BPF allows the following BPF operations:
+ * - Loading all types of BPF programs
+ * - Creating all types of BPF maps except:
+ *    - stackmap that needs CAP_TRACING
+ *    - devmap that needs CAP_NET_ADMIN
+ *    - cpumap that needs CAP_SYS_ADMIN
+ * - Advanced verifier features
+ *   - Indirect variable access
+ *   - Bounded loops
+ *   - BPF to BPF function calls
+ *   - Scalar precision tracking
+ *   - Larger complexity limits
+ *   - Dead code elimination
+ *   - And potentially other features
+ * - Use of pointer-to-integer conversions in BPF programs
+ * - Bypassing of speculation attack hardening measures
+ * - Loading BPF Type Format (BTF) data
+ * - Iterate system wide loaded programs, maps, BTF objects
+ * - Retrieve xlated and JITed code of BPF programs
+ * - Access maps and programs via id
+ * - Use bpf_spin_lock() helper
+ *
+ * CAP_BPF and CAP_TRACING together allow the following:
+ * - bpf_probe_read to read arbitrary kernel memory
+ * - bpf_trace_printk to print data to ftrace ring buffer
+ * - Attach to raw_tracepoint
+ * - Query association between kprobe/tracepoint and bpf program
+ *
+ * CAP_BPF and CAP_NET_ADMIN together allow the following:
+ * - Attach to cgroup-bpf hooks and query
+ * - skb, xdp, flow_dissector test_run command
+ *
+ * CAP_NET_ADMIN allows:
+ * - Attach networking bpf programs to xdp, tc, lwt, flow dissector
+ */
+#define CAP_BPF			38
+
+/*
+ * CAP_TRACING allows:
+ * - Full use of perf_event_open(), similarly to the effect of
+ *   kernel.perf_event_paranoid == -1
+ * - Full use of tracefs
+ * - Creation of [ku][ret]probe
+ * - Accessing arbitrary kernel memory via kprobe + probe_kernel_read
+ * - Attach tracing bpf programs to perf events
+ * - Access to kallsyms
+ */
+#define CAP_TRACING		39
 
-#define CAP_LAST_CAP         CAP_AUDIT_READ
+#define CAP_LAST_CAP         CAP_TRACING
 
 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
 
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 201f7e588a29..0b364e245163 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -26,9 +26,9 @@
 	    "audit_control", "setfcap"
 
 #define COMMON_CAP2_PERMS  "mac_override", "mac_admin", "syslog", \
-		"wake_alarm", "block_suspend", "audit_read"
+		"wake_alarm", "block_suspend", "audit_read", "bpf", "tracing"
 
-#if CAP_LAST_CAP > CAP_AUDIT_READ
+#if CAP_LAST_CAP > CAP_TRACING
 #error New capability defined, please update COMMON_CAP2_PERMS.
 #endif
 
-- 
2.20.0


^ permalink raw reply related

* Re: [PATCH net-next] net: dsa: mv88e6xxx: fix freeing unused SERDES IRQ
From: Marek Behun @ 2019-08-29  4:51 UTC (permalink / raw)
  To: Vivien Didelot; +Cc: netdev, davem, f.fainelli, andrew
In-Reply-To: <20190828185511.21956-1-vivien.didelot@gmail.com>

On Wed, 28 Aug 2019 14:55:11 -0400
Vivien Didelot <vivien.didelot@gmail.com> wrote:

> Now mv88e6xxx does not enable its ports at setup itself and let
> the DSA core handle this, unused ports are disabled without being
> powered on first. While that is expected, the SERDES powering code
> was assuming that a port was already set up before powering it down,
> resulting in freeing an unused IRQ. The patch fixes this assumption.
> 
> Fixes: b759f528ca3d ("net: dsa: mv88e6xxx: enable SERDES after setup")
> Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
> ---
>  drivers/net/dsa/mv88e6xxx/chip.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
> index 6525075f6bd3..c648f9fbfa59 100644
> --- a/drivers/net/dsa/mv88e6xxx/chip.c
> +++ b/drivers/net/dsa/mv88e6xxx/chip.c
> @@ -2070,7 +2070,8 @@ static int mv88e6xxx_serdes_power(struct mv88e6xxx_chip *chip, int port,
>  		if (chip->info->ops->serdes_irq_setup)
>  			err = chip->info->ops->serdes_irq_setup(chip, port);
>  	} else {
> -		if (chip->info->ops->serdes_irq_free)
> +		if (chip->info->ops->serdes_irq_free &&
> +		    chip->ports[port].serdes_irq)
>  			chip->info->ops->serdes_irq_free(chip, port);
>  
>  		err = chip->info->ops->serdes_power(chip, port, false);

Reviewed-by: Marek Behún <marek.behun@nic.cz>

^ permalink raw reply

* Re: [patch net-next rfc 3/7] net: rtnetlink: add commands to add and delete alternative ifnames
From: Roopa Prabhu @ 2019-08-29  4:36 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: David Miller, Jakub Kicinski, David Ahern, netdev,
	Stephen Hemminger, dcbw, Michal Kubecek, Andrew Lunn, parav,
	Saeed Mahameed, mlxsw
In-Reply-To: <20190828070711.GE2312@nanopsycho>

On Wed, Aug 28, 2019 at 12:07 AM Jiri Pirko <jiri@resnulli.us> wrote:
>
> Tue, Aug 27, 2019 at 05:14:49PM CEST, roopa@cumulusnetworks.com wrote:
> >On Tue, Aug 27, 2019 at 2:35 AM Jiri Pirko <jiri@resnulli.us> wrote:
> >>
> >> Tue, Aug 27, 2019 at 10:22:42AM CEST, davem@davemloft.net wrote:
> >> >From: Jiri Pirko <jiri@resnulli.us>
> >> >Date: Tue, 27 Aug 2019 09:08:08 +0200
> >> >
> >> >> Okay, so if I understand correctly, on top of separate commands for
> >> >> add/del of alternative names, you suggest also get/dump to be separate
> >> >> command and don't fill this up in existing newling/getlink command.
> >> >
> >> >I'm not sure what to do yet.
> >> >
> >> >David has a point, because the only way these ifnames are useful is
> >> >as ways to specify and choose net devices.  So based upon that I'm
> >> >slightly learning towards not using separate commands.
> >>
> >> Well yeah, one can use it to handle existing commands instead of
> >> IFLA_NAME.
> >>
> >> But why does it rule out separate commands? I think it is cleaner than
> >> to put everything in poor setlink messages :/ The fact that we would
> >> need to add "OP" to the setlink message just feels of. Other similar
> >> needs may show up in the future and we may endup in ridiculous messages
> >> like:
> >>
> >> SETLINK
> >>   IFLA_NAME eth0
> >>   IFLA_ATLNAME_LIST (nest)
> >>       IFLA_ALTNAME_OP add
> >>       IFLA_ALTNAME somereallylongname
> >>       IFLA_ALTNAME_OP del
> >>       IFLA_ALTNAME somereallyreallylongname
> >>       IFLA_ALTNAME_OP add
> >>       IFLA_ALTNAME someotherreallylongname
> >>   IFLA_SOMETHING_ELSE_LIST (nest)
> >>       IFLA_SOMETHING_ELSE_OP add
> >>       ...
> >>       IFLA_SOMETHING_ELSE_OP del
> >>       ...
> >>       IFLA_SOMETHING_ELSE_OP add
> >>       ...
> >>
> >> I don't know what to think about it. Rollbacks are going to be pure hell :/
> >
> >I don't see a huge problem with the above. We need a way to solve this
> >anyways for other list types in the future correct ?.
> >The approach taken by this series will not scale if we have to add a
> >new msg type and header for every such list attribute in the future.
>
> Do you have some other examples in mind? So far, this was not needed.

yes, so far it was not needed.
No other future possible examples in mind...but I wont be surprised if
we see such cases in the future.
Having a consistent API to extend a list attribute will help.

>
>
> >
> >A good parallel here is bridge vlan which uses RTM_SETLINK and
> >RTM_DELLINK for vlan add and deletes. But it does have an advantage of
> >a separate
> >msg space under AF_BRIDGE which makes it cleaner. Maybe something
> >closer to that  can be made to work (possibly with a msg flag) ?.
>
> 1) Not sure if AF_BRIDGE is the right example how to do things
> 2) See br_vlan_info(). It is not an OP-PER-VLAN. You either add or
> delete all passed info, depending on the cmd (RTM_SETLINK/RTM_DETLINK).

yes,  correct. I mentioned that because I was wondering if we can
think along the same lines for this API.
eg
(a) RTM_NEWLINK always replaces the list attribute
(b) RTM_SETLINK with NLM_F_APPEND always appends to the list attribute
(c) RTM_DELLINK with NLM_F_APPEND updates the list attribute

(It could be NLM_F_UPDATE if NLM_F_APPEND sounds weird in the del
case. I have not looked at the full dellink path if it will work
neatly..its been a busy day )


>
>
> >
> >Would be good to have a consistent way to update list attributes for
> >future needs too.
>
> Okay. Do you suggest to have new set of commands to handle
> adding/deleting lists of items? altNames now, others (other nests) later?
>
> Something like:
>
> CMD SETLISTS
>      IFLA_NAME eth0
>      IFLA_ATLNAME_LIST (nest)
>        IFLA_ALTNAME somereallylongname
>        IFLA_ALTNAME somereallyreallylongname
>        IFLA_ALTNAME someotherreallylongname
>      IFLA_SOMETHING_ELSE_LIST (nest)
>        IFLA_SOMETHING_ELSE
>        IFLA_SOMETHING_ELSE
>        IFLA_SOMETHING_ELSE
>
>
> CMD DELLISTS
>      IFLA_NAME eth0
>      IFLA_ATLNAME_LIST (nest)
>        IFLA_ALTNAME somereallylongname
>        IFLA_ALTNAME somereallyreallylongname
>        IFLA_ALTNAME someotherreallylongname
>      IFLA_SOMETHING_ELSE_LIST (nest)
>        IFLA_SOMETHING_ELSE
>        IFLA_SOMETHING_ELSE
>        IFLA_SOMETHING_ELSE
>
> How does this sound?

This seems fine but it does introduce a new type. If we can avoid a
new msg type with a flag that would be nice (like the NLM_F_APPEND eg
above).
The reason for that is to see if we can use it else where too (eg some
random future list attribute in the route subsystem. If a flag works
then we don't have to add a RTM_NEWROUTE variant of CMD SETLISTS and
CMD DELLISTS)

^ permalink raw reply

* Re: [PATCH bpf-next] bpf, capabilities: introduce CAP_BPF
From: Alexei Starovoitov @ 2019-08-29  4:07 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, Alexei Starovoitov, Kees Cook, LSM List,
	James Morris, Jann Horn, Peter Zijlstra, Masami Hiramatsu,
	Steven Rostedt, David S. Miller, Daniel Borkmann,
	Network Development, bpf, kernel-team, Linux API
In-Reply-To: <DA52992F-4862-4945-8482-FE619A04C753@amacapital.net>

On Wed, Aug 28, 2019 at 05:45:47PM -0700, Andy Lutomirski wrote:
> > 
> >> It seems like you are specifically trying to add a new switch to turn
> >> as much of BPF as possible on and off.  Why?
> > 
> > Didn't I explain it several times already with multiple examples
> > from systemd, daemons, bpftrace ?
> > 
> > Let's try again.
> > Take your laptop with linux distro.
> > You're the only user there. I'm assuming you're not sharing it with
> > partner and kids. This is my definition of 'single user system'.
> > You can sudo on it at any time, but obviously prefer to run as many
> > apps as possible without cap_sys_admin.
> > Now you found some awesome open source app on the web that monitors
> > the health of the kernel and will pop a nice message on a screen if
> > something is wrong. Currently this app needs root. You hesitate,
> > but the apps is so useful and it has strong upstream code review process
> > that you keep running it 24/7.
> > This is open source app. New versions come. You upgrade.
> > You have enough trust in that app that you keep running it as root.
> > But there is always a chance that new version doing accidentaly
> > something stupid as 'kill -9 -1'. It's an open source app at the end.
> > 
> > Now I come with this CAP* proposal to make this app safer.
> > I'm not making your system more secure and not making this app
> > more secure. I can only make your laptop safer for day to day work
> > by limiting the operations this app can do.
> > This particular app monitros the kernel via bpf and tracing.
> > Hence you can give it CAP_TRACING and CAP_BPF and drop the rest.
> 
> This won’t make me much more comfortable, since CAP_BPF lets it do an ever-growing set of nasty things. I’d much rather one or both of two things happen:
> 
> 1. Give it CAP_TRACING only. It can leak my data, but it’s rather hard for it to crash my laptop, lose data, or cause other shenanigans.
> 
> 2. Improve it a bit do all the privileged ops are wrapped by capset().
> 
> Does this make sense?  I’m a security person on occasion. I find vulnerabilities and exploit them deliberately and I break things by accident on a regular basis. In my considered opinion, CAP_TRACING alone, even extended to cover part of BPF as I’ve described, is decently safe. Getting root with just CAP_TRACING will be decently challenging, especially if I don’t get to read things like sshd’s memory, and improvements to mitigate even that could be added.  I am quite confident that attacks starting with CAP_TRACING will have clear audit signatures if auditing is on.  I am also confident that CAP_BPF *will* allow DoS and likely privilege escalation, and this will only get more likely as BPF gets more widely used. And, if BPF-based auditing ever becomes a thing, writing to the audit daemon’s maps will be a great way to cover one’s tracks.

CAP_TRACING, as I'm proposing it, will allow full tracefs access.
I think Steven and Massami prefer that as well.
That includes kprobe with probe_kernel_read.
That also means mini-DoS by installing kprobes everywhere or running too much ftrace.

CAP_TRACING will allow perf_event_open() too.
Which also means mini-DoS with too many events.

CAP_TRACING with or without CAP_BPF is safe, but it's not secure.
And that's what I need to make above 'open source kernel health app' to be safe.

In real world we have tens of such apps and they use all of the things that
I'm allowing via CAP_BPF + CAP_NET_ADMIN + CAP_TRACING.
Some apps will need only two out of three.
I don't see any further possibility to shrink the scope of the proposal.

> I’m trying to convince you that bpf’s security model can be made better
> than what you’re proposing. I’m genuinely not trying to get in your way.
> I’m trying to help you improve bpf.

If you really want to help please don't reject the real use cases
just because they don't fit into your proposal.

There is not a single feature in BPF land that we did because we simply
wanted to. For every feature we drilled into use cases to make sure
there is a real user behind it.
Same thing with CAP_BPF. I'm defining it to include GET_FD_BY_ID because
apps use it and they need to made safer.

Anyway the v2 version of the patch with CAP_TRACING and CAP_BPF is on the way.
Hopefully later tonight or tomorrow.


^ permalink raw reply

* BUG: corrupted list in p9_fd_cancelled (2)
From: syzbot @ 2019-08-29  3:58 UTC (permalink / raw)
  To: asmadeus, davem, ericvh, linux-kernel, lucho, netdev,
	syzkaller-bugs, v9fs-developer

Hello,

syzbot found the following crash on:

HEAD commit:    36146921 Merge tag 'hyperv-fixes-signed' of git://git.kern..
git tree:       upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=169f691e600000
kernel config:  https://syzkaller.appspot.com/x/.config?x=6919752cc1b760b4
dashboard link: https://syzkaller.appspot.com/bug?extid=1d26c4ed77bc6c5ed5e6
compiler:       gcc (GCC) 9.0.0 20181231 (experimental)
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=14d03ba6600000

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+1d26c4ed77bc6c5ed5e6@syzkaller.appspotmail.com

list_del corruption, ffff88808ecdbfb0->next is LIST_POISON1  
(dead000000000100)
------------[ cut here ]------------
kernel BUG at lib/list_debug.c:45!
invalid opcode: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 20174 Comm: syz-executor.1 Not tainted 5.3.0-rc5+ #125
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011
RIP: 0010:__list_del_entry_valid.cold+0x23/0x4f lib/list_debug.c:45
Code: e8 d5 06 1e fe 0f 0b 4c 89 f6 48 c7 c7 e0 26 c6 87 e8 c4 06 1e fe 0f  
0b 4c 89 ea 4c 89 f6 48 c7 c7 20 26 c6 87 e8 b0 06 1e fe <0f> 0b 4c 89 e2  
4c 89 f6 48 c7 c7 80 26 c6 87 e8 9c 06 1e fe 0f 0b
RSP: 0018:ffff8880994076d8 EFLAGS: 00010286
RAX: 000000000000004e RBX: 1ffff11013280ee9 RCX: 0000000000000000
RDX: 0000000000000000 RSI: ffffffff815c2526 RDI: ffffed1013280ecd
RBP: ffff8880994076f0 R08: 000000000000004e R09: ffffed1015d060d1
R10: ffffed1015d060d0 R11: ffff8880ae830687 R12: dead000000000122
R13: dead000000000100 R14: ffff88808ecdbfb0 R15: ffff88808ecdbfb8
FS:  00007fb2aca54700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ffee6574f58 CR3: 00000000a8e6d000 CR4: 00000000001406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
  __list_del_entry include/linux/list.h:131 [inline]
  list_del include/linux/list.h:139 [inline]
  p9_fd_cancelled+0x3c/0x1c0 net/9p/trans_fd.c:710
  p9_client_flush+0x1b7/0x1f0 net/9p/client.c:674
  p9_client_rpc+0x112f/0x12a0 net/9p/client.c:781
  p9_client_version net/9p/client.c:952 [inline]
  p9_client_create+0xb7f/0x1430 net/9p/client.c:1052
  v9fs_session_init+0x1e7/0x18c0 fs/9p/v9fs.c:406
  v9fs_mount+0x7d/0x920 fs/9p/vfs_super.c:120
  legacy_get_tree+0x108/0x220 fs/fs_context.c:661
  vfs_get_tree+0x8e/0x390 fs/super.c:1413
  do_new_mount fs/namespace.c:2791 [inline]
  do_mount+0x13b3/0x1c30 fs/namespace.c:3111
  ksys_mount+0xdb/0x150 fs/namespace.c:3320
  __do_sys_mount fs/namespace.c:3334 [inline]
  __se_sys_mount fs/namespace.c:3331 [inline]
  __x64_sys_mount+0xbe/0x150 fs/namespace.c:3331
  do_syscall_64+0xfd/0x6a0 arch/x86/entry/common.c:296
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x459879
Code: fd b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7  
48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff  
ff 0f 83 cb b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fb2aca53c78 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000459879
RDX: 00000000200002c0 RSI: 0000000020000040 RDI: 0000000000000000
RBP: 000000000075bfc8 R08: 0000000020000400 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb2aca546d4
R13: 00000000004c5e2f R14: 00000000004da930 R15: 00000000ffffffff
Modules linked in:
---[ end trace c76f5f29f0af3347 ]---
RIP: 0010:__list_del_entry_valid.cold+0x23/0x4f lib/list_debug.c:45
Code: e8 d5 06 1e fe 0f 0b 4c 89 f6 48 c7 c7 e0 26 c6 87 e8 c4 06 1e fe 0f  
0b 4c 89 ea 4c 89 f6 48 c7 c7 20 26 c6 87 e8 b0 06 1e fe <0f> 0b 4c 89 e2  
4c 89 f6 48 c7 c7 80 26 c6 87 e8 9c 06 1e fe 0f 0b
RSP: 0018:ffff8880994076d8 EFLAGS: 00010286
RAX: 000000000000004e RBX: 1ffff11013280ee9 RCX: 0000000000000000
RDX: 0000000000000000 RSI: ffffffff815c2526 RDI: ffffed1013280ecd
RBP: ffff8880994076f0 R08: 000000000000004e R09: ffffed1015d060d1
R10: ffffed1015d060d0 R11: ffff8880ae830687 R12: dead000000000122
R13: dead000000000100 R14: ffff88808ecdbfb0 R15: ffff88808ecdbfb8
FS:  00007fb2aca54700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ffee6574f58 CR3: 00000000a8e6d000 CR4: 00000000001406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches

^ permalink raw reply

* [PATCH netdev] net: stmmac: dwmac-rk: Don't fail if phy regulator is absent
From: Chen-Yu Tsai @ 2019-08-29  3:17 UTC (permalink / raw)
  To: Giuseppe Cavallaro, Alexandre Torgue, Jose Abreu, David S. Miller,
	Heiko Stuebner
  Cc: Chen-Yu Tsai, linux-rockchip, linux-arm-kernel, netdev,
	linux-kernel

From: Chen-Yu Tsai <wens@csie.org>

The devicetree binding lists the phy phy as optional. As such, the
driver should not bail out if it can't find a regulator. Instead it
should just skip the remaining regulator related code and continue
on normally.

Skip the remainder of phy_power_on() if a regulator supply isn't
available. This also gets rid of the bogus return code.

Fixes: 2e12f536635f ("net: stmmac: dwmac-rk: Use standard devicetree property for phy regulator")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
---

On a separate note, maybe we should add this file to the Rockchip
entry in MAINTAINERS?

---
 drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 4644b2aeeba1..e2e469c37a4d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1194,10 +1194,8 @@ static int phy_power_on(struct rk_priv_data *bsp_priv, bool enable)
 	int ret;
 	struct device *dev = &bsp_priv->pdev->dev;
 
-	if (!ldo) {
-		dev_err(dev, "no regulator found\n");
-		return -1;
-	}
+	if (!ldo)
+		return 0;
 
 	if (enable) {
 		ret = regulator_enable(ldo);
-- 
2.20.1


^ permalink raw reply related

* [PATCH] amd-xgbe: Fix error path in xgbe_mod_init()
From: YueHaibing @ 2019-08-29  2:46 UTC (permalink / raw)
  To: thomas.lendacky, davem; +Cc: netdev, linux-kernel, YueHaibing

In xgbe_mod_init(), we should do cleanup if some error occurs

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: efbaa828330a ("amd-xgbe: Add support to handle device renaming")
Fixes: 47f164deab22 ("amd-xgbe: Add PCI device support")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-main.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index b41f236..7ce9c69 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -469,13 +469,19 @@ static int __init xgbe_mod_init(void)
 
 	ret = xgbe_platform_init();
 	if (ret)
-		return ret;
+		goto err_platform_init;
 
 	ret = xgbe_pci_init();
 	if (ret)
-		return ret;
+		goto err_pci_init;
 
 	return 0;
+
+err_pci_init:
+	xgbe_platform_exit();
+err_platform_init:
+	unregister_netdevice_notifier(&xgbe_netdev_notifier);
+	return ret;
 }
 
 static void __exit xgbe_mod_exit(void)
-- 
1.8.3.1



^ permalink raw reply related

* Re: [PATCH v2] riscv: add support for SECCOMP and SECCOMP_FILTER
From: Paul Walmsley @ 2019-08-29  1:30 UTC (permalink / raw)
  To: Kees Cook, Tycho Andersen
  Cc: David Abdurachmanov, Palmer Dabbelt, Albert Ou, Oleg Nesterov,
	Andy Lutomirski, Will Drewry, Shuah Khan, Alexei Starovoitov,
	Daniel Borkmann, Martin KaFai Lau, Song Liu, Yonghong Song,
	David Abdurachmanov, Thomas Gleixner, Allison Randal,
	Alexios Zavras, Anup Patel, Vincent Chen, Alan Kao, linux-riscv,
	linux-kernel, linux-kselftest, netdev, bpf, me
In-Reply-To: <201908261043.08510F5E66@keescook>

Hi Kees,

On Mon, 26 Aug 2019, Kees Cook wrote:

> On Mon, Aug 26, 2019 at 09:39:50AM -0700, David Abdurachmanov wrote:
> > I don't have the a build with SECCOMP for the board right now, so it
> > will have to wait. I just finished a new kernel (almost rc6) for Fedora,
> 
> FWIW, I don't think this should block landing the code: all the tests
> fail without seccomp support. ;) So this patch is an improvement!

Am sympathetic to this -- we did it with the hugetlb patches for RISC-V -- 
but it would be good to understand a little bit more about why the test 
fails before we merge it.

Once we merge the patch, it will probably reduce the motivation for others 
to either understand and fix the underlying problem with the RISC-V code 
-- or, if it truly is a flaky test, to drop (or fix) the test in the 
seccomp_bpf kselftests.

Thanks for helping to take a closer look at this,

- Paul

^ permalink raw reply

* Re: [PATCH net-next 0/4] mlxsw: Various updates
From: David Miller @ 2019-08-29  1:24 UTC (permalink / raw)
  To: idosch; +Cc: netdev, jiri, mlxsw, idosch
In-Reply-To: <20190828155437.9852-1-idosch@idosch.org>

From: Ido Schimmel <idosch@idosch.org>
Date: Wed, 28 Aug 2019 18:54:33 +0300

> From: Ido Schimmel <idosch@mellanox.com>
> 
> Patch #1 from Amit removes 56G speed support. The reasons for this are
> detailed in the commit message.
> 
> Patch #2 from Shalom ensures that the hardware does not auto negotiate
> the number of used lanes. For example, if a four lane port supports 100G
> over both two and four lanes, it will not advertise the two lane link
> mode.
> 
> Patch #3 bumps the firmware version supported by the driver.
> 
> Patch #4 from Petr adds ethtool counters to help debug the internal PTP
> implementation in mlxsw. I copied Richard on this patch in case he has
> comments.

Series applied.

^ permalink raw reply

* RE: [PATCH net v4 0/2] r8152: fix side effect
From: Hayes Wang @ 2019-08-29  1:08 UTC (permalink / raw)
  To: David Miller
  Cc: netdev@vger.kernel.org, nic_swsd, linux-kernel@vger.kernel.org
In-Reply-To: <20190828.161735.1528060932193718727.davem@davemloft.net>

David Miller [mailto:davem@davemloft.net]
> Sent: Thursday, August 29, 2019 7:18 AM
[...]
> > v4:
> > Add Fixes tag for both patch #1 and #2.
> 
> I applied v3, sorry.
> 
> I think it is OK as I will backport things to v5.2 -stable anyways.

Thanks.

Best Regards,
Hayes



^ permalink raw reply

* Re: [PATCH bpf-next] bpf, capabilities: introduce CAP_BPF
From: Andy Lutomirski @ 2019-08-29  0:58 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Andy Lutomirski, Alexei Starovoitov, Kees Cook, LSM List,
	James Morris, Jann Horn, Peter Zijlstra, Masami Hiramatsu,
	Steven Rostedt, David S. Miller, Daniel Borkmann,
	Network Development, bpf, kernel-team, Linux API
In-Reply-To: <20190828233828.p7xddyw3fjzfinm6@ast-mbp.dhcp.thefacebook.com>



> On Aug 28, 2019, at 4:38 PM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> 
>> On Tue, Aug 27, 2019 at 11:20:19PM -0700, Andy Lutomirski wrote:
>> On Tue, Aug 27, 2019 at 9:49 PM Alexei Starovoitov
>> <alexei.starovoitov@gmail.com> wrote:
>>> 
>>>> On Tue, Aug 27, 2019 at 07:00:40PM -0700, Andy Lutomirski wrote:
>>>> 
>>>> Let me put this a bit differently. Part of the point is that
>>>> CAP_TRACING should allow a user or program to trace without being able
>>>> to corrupt the system. CAP_BPF as you’ve proposed it *can* likely
>>>> crash the system.
>>> 
>>> Really? I'm still waiting for your example where bpf+kprobe crashes the system...
>>> 
>> 
>> That's not what I meant.  bpf+kprobe causing a crash is a bug.  I'm
>> referring to a totally different issue.  On my laptop:
>> 
>> $ sudo bpftool map
>> 48: hash  name foobar  flags 0x0
>>    key 8B  value 8B  max_entries 64  memlock 8192B
>> 181: lpm_trie  flags 0x1
>>    key 8B  value 8B  max_entries 1  memlock 4096B
>> 182: lpm_trie  flags 0x1
>>    key 20B  value 8B  max_entries 1  memlock 4096B
>> 183: lpm_trie  flags 0x1
>>    key 8B  value 8B  max_entries 1  memlock 4096B
>> 184: lpm_trie  flags 0x1
>>    key 20B  value 8B  max_entries 1  memlock 4096B
>> 185: lpm_trie  flags 0x1
>>    key 8B  value 8B  max_entries 1  memlock 4096B
>> 186: lpm_trie  flags 0x1
>>    key 20B  value 8B  max_entries 1  memlock 4096B
>> 187: lpm_trie  flags 0x1
>>    key 8B  value 8B  max_entries 1  memlock 4096B
>> 188: lpm_trie  flags 0x1
>>    key 20B  value 8B  max_entries 1  memlock 4096B
>> 
>> $ sudo bpftool map dump id 186
>> key:
>> 00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00
>> 00 00 00 00
>> value:
>> 02 00 00 00 00 00 00 00
>> Found 1 element
>> 
>> $ sudo bpftool map delete id 186 key hex 00 00 00 00 00 00 00 00 00 00
>> 00 00 00 00 00 00 00 00 00 00
>> [this worked]
>> 
>> I don't know what my laptop was doing with map id 186 in particular,
>> but, whatever it was, I definitely broke it.  If a BPF firewall is in
>> use on something important enough, this could easily remove
>> connectivity from part or all of the system.  Right now, this needs
>> CAP_SYS_ADMIN.  With your patch, CAP_BPF is sufficient to do this, but
>> you *also* need CAP_BPF to trace the system using BPF.  Tracing with
>> BPF is 'safe' in the absence of bugs.  Modifying other peoples' maps
>> is not.
> 
> That lpm_trie is likely systemd implementing IP sandboxing.
> Not sure whether it's white or black list.
> Deleting an IP address from that map will either allow or disallow
> network traffic.
> Out of band operation on bpf map broke some bpf program. Sure.
> But calling it 'breaking the system' is quite a stretch.
> Calling it 'crashing the system' is plain wrong.
> Yet you're generalizing this bpf map read/write as
> "CAP_BPF as you’ve proposed it *can* likely crash the system."
> This is what I have a problem with.

Well, after I sent that email, firewalld on my laptop exploded and the system eventually hung.  I call that broken, and I really made a minimal effort here to break things.

> 
> Anyway, changing gears...
> Yes. I did propose to make a task with CAP_BPF to be able to
> manipulate arbitrary maps in the system.
> You could have said that if CAP_BPF is given to 'bpftool'
> then any user will be able to mess with other maps because
> bpftool is likely chmod-ed 755.
> Absolutely correct!
> It's not a fault of the CAP_BPF scope.
> Just don't give that cap to bpftool or do different acl/chmod.

I see no reason that allowing a user to use most of bpftool’s functionality necessarily needs to allow that user to corrupt the system. It obviously will expand the attack surface available to that user, but that should be it.

I’m trying to convince you that bpf’s security model can be made better than what you’re proposing. I’m genuinely not trying to get in your way. I’m trying to help you improve bpf.

^ permalink raw reply

* Re: [PATCH bpf-next] bpf, capabilities: introduce CAP_BPF
From: Andy Lutomirski @ 2019-08-29  0:53 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Andy Lutomirski, Alexei Starovoitov, Kees Cook, LSM List,
	James Morris, Jann Horn, Peter Zijlstra, Masami Hiramatsu,
	Steven Rostedt, David S. Miller, Daniel Borkmann,
	Network Development, bpf, kernel-team, Linux API
In-Reply-To: <DA52992F-4862-4945-8482-FE619A04C753@amacapital.net>



> On Aug 28, 2019, at 5:45 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> 
> 
>>> On Aug 28, 2019, at 3:55 PM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>>> 
>>> On Tue, Aug 27, 2019 at 11:12:29PM -0700, Andy Lutomirski wrote:
>>>>> 
>>>>> 
>>>>> From the previous discussion, you want to make progress toward solving
>>>>> a lot of problems with CAP_BPF.  One of them was making BPF
>>>>> firewalling more generally useful. By making CAP_BPF grant the ability
>>>>> to read kernel memory, you will make administrators much more nervous
>>>>> to grant CAP_BPF.
>>>> 
>>>> Andy, were your email hacked?
>>>> I explained several times that in this proposal
>>>> CAP_BPF _and_ CAP_TRACING _both_ are necessary to read kernel memory.
>>>> CAP_BPF alone is _not enough_.
>>> 
>>> You have indeed said this many times.  You've stated it as a matter of
>>> fact as though it cannot possibly discussed.  I'm asking you to
>>> justify it.
>> 
>> That's not how I see it.
>> I kept stating that both CAP_BPF and CAP_TRACING are necessary to read
>> kernel memory whereas you kept distorting my statement by dropping second
>> part and then making claims that "CAP_BPF grant the ability to read
>> kernel memory, you will make administrators much more nervous".
> 
> Mea culpa. CAP_BPF does, however, appear to permit breaking kASLR due to unsafe pointer conversions, and it allows reading and writing everyone’s maps.  I stand by my overall point.
> 
>> 
>> Just s/CAP_BPF/CAP_BPF and CAP_TRACING/ in this above sentence.
>> See that meaning suddenly changes?
>> Now administrators would be worried about tasks that have both at once.
>> They also would be worried about tasks that have CAP_TRACING alone,
>> because that's what allows probe_kernel_read().
> 
> This is not all what I meant. Of course granting CAP_BPF+CAP_TRACING allows reading kernel memory. This is not at all a problem.  Here is a problem I see:
> 
> CAP_TRACING + CAP_BPF allows modification of other people’s maps and potentially other things that should not be implied by CAP_TRACING alone and that don’t need to be available to tracers. So CAP_TRACING, which is powerful but has somewhat limited scope, isn’t fully useful without CAP_BPF, and giving CAP_TRACING *and* CAP_BPF allows things that teachers shouldn’t be able to do. I think this would make the whole mechanism less useful to Android, for example.
> 
> (Also, I’m not sure quite what you mean by “CAP_TRACING ... allows probe_kernel_read()”. probe_kernel_read() is a kernel function that can’t be directly called by userspace. CAP_TRACING allows reading kernel memory in plenty of ways regardless.)
> 
>> 
>>> It seems like you are specifically trying to add a new switch to turn
>>> as much of BPF as possible on and off.  Why?
>> 
>> Didn't I explain it several times already with multiple examples
>> from systemd, daemons, bpftrace ?
>> 
>> Let's try again.
>> Take your laptop with linux distro.
>> You're the only user there. I'm assuming you're not sharing it with
>> partner and kids. This is my definition of 'single user system'.
>> You can sudo on it at any time, but obviously prefer to run as many
>> apps as possible without cap_sys_admin.
>> Now you found some awesome open source app on the web that monitors
>> the health of the kernel and will pop a nice message on a screen if
>> something is wrong. Currently this app needs root. You hesitate,
>> but the apps is so useful and it has strong upstream code review process
>> that you keep running it 24/7.
>> This is open source app. New versions come. You upgrade.
>> You have enough trust in that app that you keep running it as root.
>> But there is always a chance that new version doing accidentaly
>> something stupid as 'kill -9 -1'. It's an open source app at the end.
>> 
>> Now I come with this CAP* proposal to make this app safer.
>> I'm not making your system more secure and not making this app
>> more secure. I can only make your laptop safer for day to day work
>> by limiting the operations this app can do.
>> This particular app monitros the kernel via bpf and tracing.
>> Hence you can give it CAP_TRACING and CAP_BPF and drop the rest.
> 
> This won’t make me much more comfortable, since CAP_BPF lets it do an ever-growing set of nasty things. I’d much rather one or both of two things happen:
> 
> 1. Give it CAP_TRACING only. It can leak my data, but it’s rather hard for it to crash my laptop, lose data, or cause other shenanigans.
> 
> 2. Improve it a bit do all the privileged ops are wrapped by capset().
> 
> Does this make sense?  I’m a security person on occasion. I find vulnerabilities and exploit them deliberately and I break things by accident on a regular basis. In my considered opinion, CAP_TRACING alone, even extended to cover part of BPF as I’ve described, is decently safe. Getting root with just CAP_TRACING will be decently challenging, especially if I don’t get to read things like sshd’s memory, and improvements to mitigate even that could be added.  I am quite confident that attacks starting with CAP_TRACING will have clear audit signatures if auditing is on.  I am also confident that CAP_BPF *will* allow DoS and likely privilege escalation, and this will only get more likely as BPF gets more widely used. And, if BPF-based auditing ever becomes a thing, writing to the audit daemon’s maps will be a great way to cover one’s tracks.
> 
> 
>> I think they have no choice but to do kernel.unprivileged_bpf_disabled=1.
>> We, as a kernel community, are forcing the users into it.
>> Hence I really do not see a value in any proposal today that expands
>> unprivileged bpf usage.
> 
> I think you’re overemphasizing bpf’s role in the whole speculation mess. I realize that you’ve spent an insane amount of time on mitigations to stupid issues. I’ve spent a less insane amount of time on mitigating similar issues outside of bpf.  It’s a mess.  At the end of the day, the kernel does its best, and new bugs show up. New CPUs will be less buggy.

Bah, accidentally hit send.

If the kernel’s mitigations aren’t good enough or you’re subject to direct user attack (e.g. via insufficient IBPB, SMT attack, etc) then you’re vulnerable. Otherwise you’re less vulnerable. BPF is by no means the whole story. Heck, the kernel *could*, at unfortunate performance cost, more aggressively flush around BPF and effectively treat it like user code.

So I think we should design bpf’s API’s security with the philosophy that speculation attacks are just one more type of bug, and we should make sure that real-world useful configurations don’t give BPF to tasks that don’t need it. My unpriv proposal tries to do this. This is *why* my proposal keeps test_run locked down and restricts running each program type to tasks that are explicitly granted the ability to attach it.

So let’s let CAP_TRACING use bpf. Speculation attacks are mostly irrelevant to tracers anyway, but all the rest of the stuff I’ve been talking is relevant.

^ permalink raw reply

* Re: [PATCH bpf-next] bpf, capabilities: introduce CAP_BPF
From: Andy Lutomirski @ 2019-08-29  0:45 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Andy Lutomirski, Alexei Starovoitov, Kees Cook, LSM List,
	James Morris, Jann Horn, Peter Zijlstra, Masami Hiramatsu,
	Steven Rostedt, David S. Miller, Daniel Borkmann,
	Network Development, bpf, kernel-team, Linux API
In-Reply-To: <20190828225512.q6qbvkdiqih2iewk@ast-mbp.dhcp.thefacebook.com>


> On Aug 28, 2019, at 3:55 PM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> 
>> On Tue, Aug 27, 2019 at 11:12:29PM -0700, Andy Lutomirski wrote:
>>>> 
>>>> 
>>>> From the previous discussion, you want to make progress toward solving
>>>> a lot of problems with CAP_BPF.  One of them was making BPF
>>>> firewalling more generally useful. By making CAP_BPF grant the ability
>>>> to read kernel memory, you will make administrators much more nervous
>>>> to grant CAP_BPF.
>>> 
>>> Andy, were your email hacked?
>>> I explained several times that in this proposal
>>> CAP_BPF _and_ CAP_TRACING _both_ are necessary to read kernel memory.
>>> CAP_BPF alone is _not enough_.
>> 
>> You have indeed said this many times.  You've stated it as a matter of
>> fact as though it cannot possibly discussed.  I'm asking you to
>> justify it.
> 
> That's not how I see it.
> I kept stating that both CAP_BPF and CAP_TRACING are necessary to read
> kernel memory whereas you kept distorting my statement by dropping second
> part and then making claims that "CAP_BPF grant the ability to read
> kernel memory, you will make administrators much more nervous".

Mea culpa. CAP_BPF does, however, appear to permit breaking kASLR due to unsafe pointer conversions, and it allows reading and writing everyone’s maps.  I stand by my overall point.

> 
> Just s/CAP_BPF/CAP_BPF and CAP_TRACING/ in this above sentence.
> See that meaning suddenly changes?
> Now administrators would be worried about tasks that have both at once.
> They also would be worried about tasks that have CAP_TRACING alone,
> because that's what allows probe_kernel_read().

This is not all what I meant. Of course granting CAP_BPF+CAP_TRACING allows reading kernel memory. This is not at all a problem.  Here is a problem I see:

CAP_TRACING + CAP_BPF allows modification of other people’s maps and potentially other things that should not be implied by CAP_TRACING alone and that don’t need to be available to tracers. So CAP_TRACING, which is powerful but has somewhat limited scope, isn’t fully useful without CAP_BPF, and giving CAP_TRACING *and* CAP_BPF allows things that teachers shouldn’t be able to do. I think this would make the whole mechanism less useful to Android, for example.

(Also, I’m not sure quite what you mean by “CAP_TRACING ... allows probe_kernel_read()”. probe_kernel_read() is a kernel function that can’t be directly called by userspace. CAP_TRACING allows reading kernel memory in plenty of ways regardless.)

> 
>> It seems like you are specifically trying to add a new switch to turn
>> as much of BPF as possible on and off.  Why?
> 
> Didn't I explain it several times already with multiple examples
> from systemd, daemons, bpftrace ?
> 
> Let's try again.
> Take your laptop with linux distro.
> You're the only user there. I'm assuming you're not sharing it with
> partner and kids. This is my definition of 'single user system'.
> You can sudo on it at any time, but obviously prefer to run as many
> apps as possible without cap_sys_admin.
> Now you found some awesome open source app on the web that monitors
> the health of the kernel and will pop a nice message on a screen if
> something is wrong. Currently this app needs root. You hesitate,
> but the apps is so useful and it has strong upstream code review process
> that you keep running it 24/7.
> This is open source app. New versions come. You upgrade.
> You have enough trust in that app that you keep running it as root.
> But there is always a chance that new version doing accidentaly
> something stupid as 'kill -9 -1'. It's an open source app at the end.
> 
> Now I come with this CAP* proposal to make this app safer.
> I'm not making your system more secure and not making this app
> more secure. I can only make your laptop safer for day to day work
> by limiting the operations this app can do.
> This particular app monitros the kernel via bpf and tracing.
> Hence you can give it CAP_TRACING and CAP_BPF and drop the rest.

This won’t make me much more comfortable, since CAP_BPF lets it do an ever-growing set of nasty things. I’d much rather one or both of two things happen:

1. Give it CAP_TRACING only. It can leak my data, but it’s rather hard for it to crash my laptop, lose data, or cause other shenanigans.

2. Improve it a bit do all the privileged ops are wrapped by capset().

Does this make sense?  I’m a security person on occasion. I find vulnerabilities and exploit them deliberately and I break things by accident on a regular basis. In my considered opinion, CAP_TRACING alone, even extended to cover part of BPF as I’ve described, is decently safe. Getting root with just CAP_TRACING will be decently challenging, especially if I don’t get to read things like sshd’s memory, and improvements to mitigate even that could be added.  I am quite confident that attacks starting with CAP_TRACING will have clear audit signatures if auditing is on.  I am also confident that CAP_BPF *will* allow DoS and likely privilege escalation, and this will only get more likely as BPF gets more widely used. And, if BPF-based auditing ever becomes a thing, writing to the audit daemon’s maps will be a great way to cover one’s tracks.


> I think they have no choice but to do kernel.unprivileged_bpf_disabled=1.
> We, as a kernel community, are forcing the users into it.
> Hence I really do not see a value in any proposal today that expands
> unprivileged bpf usage.

I think you’re overemphasizing bpf’s role in the whole speculation mess. I realize that you’ve spent an insane amount of time on mitigations to stupid issues. I’ve spent a less insane amount of time on mitigating similar issues outside of bpf.  It’s a mess.  At the end of the day, the kernel does its best, and new bugs show up. New CPUs will be less buggy. 

^ permalink raw reply

* Re: [PATCH net-next v2 6/9] r8169: don't use bit LastFrag in tx descriptor after send
From: Jakub Kicinski @ 2019-08-29  0:29 UTC (permalink / raw)
  To: Heiner Kallweit
  Cc: Realtek linux nic maintainers, David Miller,
	netdev@vger.kernel.org, Chun-Hao Lin
In-Reply-To: <5b4c94bf-4571-7b36-1d83-c169980a6867@gmail.com>

On Wed, 28 Aug 2019 22:27:30 +0200, Heiner Kallweit wrote:
> On RTL8125 this bit is always cleared after send. Therefore check for
> tx_skb->skb being set what is functionally equivalent.
> 
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index 652bacf62..4489cd9f2 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -5713,7 +5713,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,
>  
>  		rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb,
>  				     tp->TxDescArray + entry);
> -		if (status & LastFrag) {
> +		if (tx_skb->skb) {
>  			pkts_compl++;
>  			bytes_compl += tx_skb->skb->len;
>  			napi_consume_skb(tx_skb->skb, budget);

Hmm.. the dma_rmb() looks a little sus. Honestly I'm unclear on what it
was doing in the first place. READ_ONCE() should've been sufficient..

And it's not obviously clear what does the smp_rmb() at the start of
the function pair with.

But I don't think you're making anything worse here :)

^ permalink raw reply

* Re: [pull request][net-next v2 0/8] Mellanox, mlx5 updates 2019-08-22
From: Jakub Kicinski @ 2019-08-29  0:19 UTC (permalink / raw)
  To: Saeed Mahameed; +Cc: David S. Miller, netdev@vger.kernel.org
In-Reply-To: <20190828185720.2300-1-saeedm@mellanox.com>

On Wed, 28 Aug 2019 18:57:39 +0000, Saeed Mahameed wrote:
> This series provides some misc updates to mlx5 driver.
> For more information please see tag log below.
> 
> Please pull and let me know if there is any problem.
> 
> Please note that the series starts with a merge of mlx5-next branch,
> to resolve and avoid dependency with rdma tree.
> 
> v2: 
>  - Change statistics counter name to dev_internal_queue_oob as
>    suggested by Jakub.
>  - Fixed an issue with IP-in-IP TSO patch, found by regression testing.


Thanks! LGTM now

^ permalink raw reply

* Re: [PATCH V3 net 1/2] openvswitch: Properly set L4 keys on "later" IP fragments
From: Gregory Rose @ 2019-08-28 23:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, pshelar, joe
In-Reply-To: <20190828.145409.412910250799244993.davem@davemloft.net>

On 8/28/2019 2:54 PM, David Miller wrote:
> From: Greg Rose <gvrose8192@gmail.com>
> Date: Tue, 27 Aug 2019 07:58:09 -0700
>
>> When IP fragments are reassembled before being sent to conntrack, the
>> key from the last fragment is used.  Unless there are reordering
>> issues, the last fragment received will not contain the L4 ports, so the
>> key for the reassembled datagram won't contain them.  This patch updates
>> the key once we have a reassembled datagram.
>>
>> The handle_fragments() function works on L3 headers so we pull the L3/L4
>> flow key update code from key_extract into a new function
>> 'key_extract_l3l4'.  Then we add a another new function
>> ovs_flow_key_update_l3l4() and export it so that it is accessible by
>> handle_fragments() for conntrack packet reassembly.
>>
>> Co-authored by: Justin Pettit <jpettit@ovn.org>
>> Signed-off-by: Greg Rose <gvrose8192@gmail.com>
> Applied with Co-authored-by fixed.
Thanks for fixing that up Dave.

- Greg

^ permalink raw reply

* auto-split of commit. Was: [PATCH bpf-next 04/10] tools/bpf: add libbpf_prog_type_(from|to)_str helpers
From: Alexei Starovoitov @ 2019-08-28 23:46 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Julia Kartseva, ast, Greg Kroah-Hartman, Thomas Gleixner, rdna,
	bpf, daniel, netdev, kernel-team
In-Reply-To: <20190828163422.3d167c4b@cakuba.netronome.com>

On Wed, Aug 28, 2019 at 04:34:22PM -0700, Jakub Kicinski wrote:
> 
> Greg, Thomas, libbpf is extracted from the kernel sources and
> maintained in a clone repo on GitHub for ease of packaging.
> 
> IIUC Alexei's concern is that since we are moving the commits from
> the kernel repo to the GitHub one we have to preserve the commits
> exactly as they are, otherwise SOB lines lose their power.
> 
> Can you provide some guidance on whether that's a valid concern, 
> or whether it's perfectly fine to apply a partial patch?

Right. That's exactly the concern.

Greg, Thomas,
could you please put your legal hat on and clarify the following.
Say some developer does a patch that modifies
include/uapi/linux/bpf.h
..some other kernel code...and
tools/include/uapi/linux/bpf.h

That tools/include/uapi/linux/bpf.h is used by perf and by libbpf.
We have automatic mirror of tools/libbpf into github/libbpf/
so that external projects and can do git submodule of it,
can build packages out of it, etc.

The question is whether it's ok to split tools/* part out of
original commit, keep Author and SOB, create new commit out of it,
and automatically push that auto-generated commit into github mirror.

So far we've requested all developers to split their patches manually.
So that tools/* update is an individual commit that mirror can
simply git cherry-pick.


^ permalink raw reply

* Re: [PATCH net-next 0/4] mlxsw: Various updates
From: Jakub Kicinski @ 2019-08-28 23:45 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: netdev, davem, jiri, mlxsw, Ido Schimmel
In-Reply-To: <20190828155437.9852-1-idosch@idosch.org>

On Wed, 28 Aug 2019 18:54:33 +0300, Ido Schimmel wrote:
> From: Ido Schimmel <idosch@mellanox.com>
> 
> Patch #1 from Amit removes 56G speed support. The reasons for this are
> detailed in the commit message.
> 
> Patch #2 from Shalom ensures that the hardware does not auto negotiate
> the number of used lanes. For example, if a four lane port supports 100G
> over both two and four lanes, it will not advertise the two lane link
> mode.
> 
> Patch #3 bumps the firmware version supported by the driver.
> 
> Patch #4 from Petr adds ethtool counters to help debug the internal PTP
> implementation in mlxsw. I copied Richard on this patch in case he has
> comments.

LGTM

^ permalink raw reply

* Re: [PATCH v2] riscv: add support for SECCOMP and SECCOMP_FILTER
From: Kees Cook @ 2019-08-28 23:44 UTC (permalink / raw)
  To: David Abdurachmanov
  Cc: Paul Walmsley, Palmer Dabbelt, Albert Ou, Oleg Nesterov,
	Andy Lutomirski, Will Drewry, Shuah Khan, Alexei Starovoitov,
	Daniel Borkmann, Martin KaFai Lau, Song Liu, Yonghong Song,
	David Abdurachmanov, Thomas Gleixner, Allison Randal,
	Alexios Zavras, Anup Patel, Vincent Chen, Alan Kao, linux-riscv,
	linux-kernel, linux-kselftest, netdev, bpf, me
In-Reply-To: <CAEn-LToB1atxDvehBanVaxg6sk8zDkMe_CbqeTVgKNzOvD9-Sw@mail.gmail.com>

On Wed, Aug 28, 2019 at 02:37:34PM -0700, David Abdurachmanov wrote:
>     --disk path=$PWD/disk \
>     --boot kernel=$PWD/${FIRMWARE} \

This is where I tripped over things. How do I specify the kernel to boot
from OUTSIDE the disk image?

-- 
Kees Cook

^ permalink raw reply

* Re: [PATCH bpf-next] bpf, capabilities: introduce CAP_BPF
From: Alexei Starovoitov @ 2019-08-28 23:38 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Alexei Starovoitov, Kees Cook, LSM List, James Morris, Jann Horn,
	Peter Zijlstra, Masami Hiramatsu, Steven Rostedt, David S. Miller,
	Daniel Borkmann, Network Development, bpf, kernel-team, Linux API
In-Reply-To: <CALCETrX-bn2SpVzTkPz+A=z_oWDs7PNeouzK7wRWMzyaBd4+7g@mail.gmail.com>

On Tue, Aug 27, 2019 at 11:20:19PM -0700, Andy Lutomirski wrote:
> On Tue, Aug 27, 2019 at 9:49 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Tue, Aug 27, 2019 at 07:00:40PM -0700, Andy Lutomirski wrote:
> > >
> > > Let me put this a bit differently. Part of the point is that
> > > CAP_TRACING should allow a user or program to trace without being able
> > > to corrupt the system. CAP_BPF as you’ve proposed it *can* likely
> > > crash the system.
> >
> > Really? I'm still waiting for your example where bpf+kprobe crashes the system...
> >
> 
> That's not what I meant.  bpf+kprobe causing a crash is a bug.  I'm
> referring to a totally different issue.  On my laptop:
> 
> $ sudo bpftool map
> 48: hash  name foobar  flags 0x0
>     key 8B  value 8B  max_entries 64  memlock 8192B
> 181: lpm_trie  flags 0x1
>     key 8B  value 8B  max_entries 1  memlock 4096B
> 182: lpm_trie  flags 0x1
>     key 20B  value 8B  max_entries 1  memlock 4096B
> 183: lpm_trie  flags 0x1
>     key 8B  value 8B  max_entries 1  memlock 4096B
> 184: lpm_trie  flags 0x1
>     key 20B  value 8B  max_entries 1  memlock 4096B
> 185: lpm_trie  flags 0x1
>     key 8B  value 8B  max_entries 1  memlock 4096B
> 186: lpm_trie  flags 0x1
>     key 20B  value 8B  max_entries 1  memlock 4096B
> 187: lpm_trie  flags 0x1
>     key 8B  value 8B  max_entries 1  memlock 4096B
> 188: lpm_trie  flags 0x1
>     key 20B  value 8B  max_entries 1  memlock 4096B
> 
> $ sudo bpftool map dump id 186
> key:
> 00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00
> 00 00 00 00
> value:
> 02 00 00 00 00 00 00 00
> Found 1 element
> 
> $ sudo bpftool map delete id 186 key hex 00 00 00 00 00 00 00 00 00 00
> 00 00 00 00 00 00 00 00 00 00
> [this worked]
> 
> I don't know what my laptop was doing with map id 186 in particular,
> but, whatever it was, I definitely broke it.  If a BPF firewall is in
> use on something important enough, this could easily remove
> connectivity from part or all of the system.  Right now, this needs
> CAP_SYS_ADMIN.  With your patch, CAP_BPF is sufficient to do this, but
> you *also* need CAP_BPF to trace the system using BPF.  Tracing with
> BPF is 'safe' in the absence of bugs.  Modifying other peoples' maps
> is not.

That lpm_trie is likely systemd implementing IP sandboxing.
Not sure whether it's white or black list.
Deleting an IP address from that map will either allow or disallow
network traffic.
Out of band operation on bpf map broke some bpf program. Sure.
But calling it 'breaking the system' is quite a stretch.
Calling it 'crashing the system' is plain wrong.
Yet you're generalizing this bpf map read/write as
"CAP_BPF as you’ve proposed it *can* likely crash the system."
This is what I have a problem with.

Anyway, changing gears...
Yes. I did propose to make a task with CAP_BPF to be able to
manipulate arbitrary maps in the system.
You could have said that if CAP_BPF is given to 'bpftool'
then any user will be able to mess with other maps because
bpftool is likely chmod-ed 755.
Absolutely correct!
It's not a fault of the CAP_BPF scope.
Just don't give that cap to bpftool or do different acl/chmod.

> If the answer is the latter, then maybe it would make sense to try to
> implement some of the unprivileged bpf stuff and then to see whether
> CAP_BPF is still needed.

<broken_record_mode=on> Nack to extensions to unprivileged bpf.


^ permalink raw reply

* Re: [PATCH bpf-next 04/10] tools/bpf: add libbpf_prog_type_(from|to)_str helpers
From: Jakub Kicinski @ 2019-08-28 23:34 UTC (permalink / raw)
  To: Julia Kartseva, ast, Greg Kroah-Hartman, Thomas Gleixner
  Cc: rdna, bpf, daniel, netdev, kernel-team
In-Reply-To: <467620c966825173dbd65b37a3f9bd7dd4fb8184.1567024943.git.hex@fb.com>

On Wed, 28 Aug 2019 14:03:07 -0700, Julia Kartseva wrote:
> Standardize string representation of prog types by putting commonly used
> names to libbpf.
> The prog_type to string mapping is taken from bpftool:
> tools/bpf/bpftool/main.h
> 
> Signed-off-by: Julia Kartseva <hex@fb.com>

This "libbpf patches have to be completely separate" just went to
another level :/ Now we are splitting code moves into add and remove
parts which are 5 patches apart? How are we supposed to review this?


Greg, Thomas, libbpf is extracted from the kernel sources and
maintained in a clone repo on GitHub for ease of packaging.

IIUC Alexei's concern is that since we are moving the commits from
the kernel repo to the GitHub one we have to preserve the commits
exactly as they are, otherwise SOB lines lose their power.

Can you provide some guidance on whether that's a valid concern, 
or whether it's perfectly fine to apply a partial patch?

(HW vendors also back port tree-wide cleanups into their drivers,
 so if SOB lines are voided by git format-patch -- driver/path/
 that'd be quite an issue..)

> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 72e6e5eb397f..946a4d41f223 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -296,6 +296,35 @@ struct bpf_object {
>  };
>  #define obj_elf_valid(o)	((o)->efile.elf)
>  
> +static const char *const prog_type_strs[] = {
> +	[BPF_PROG_TYPE_UNSPEC] = "unspec",
> +	[BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
> +	[BPF_PROG_TYPE_KPROBE] = "kprobe",
> +	[BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
> +	[BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
> +	[BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
> +	[BPF_PROG_TYPE_XDP] = "xdp",
> +	[BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
> +	[BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
> +	[BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
> +	[BPF_PROG_TYPE_LWT_IN] = "lwt_in",
> +	[BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
> +	[BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
> +	[BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
> +	[BPF_PROG_TYPE_SK_SKB] = "sk_skb",
> +	[BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
> +	[BPF_PROG_TYPE_SK_MSG] = "sk_msg",
> +	[BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
> +	[BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
> +	[BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
> +	[BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
> +	[BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
> +	[BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
> +	[BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
> +	[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
> +	[BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
> +};
> +
>  void bpf_program__unload(struct bpf_program *prog)
>  {
>  	int i;
> @@ -4632,6 +4661,28 @@ int libbpf_attach_type_by_name(const char *name,
>  	return -EINVAL;
>  }
>  
> +int libbpf_prog_type_from_str(const char *str, enum bpf_prog_type *type)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(prog_type_strs); i++)
> +		if (prog_type_strs[i] && strcmp(prog_type_strs[i], str) == 0) {
> +			*type = i;
> +			return 0;
> +		}
> +
> +	return -EINVAL;
> +}
> +
> +int libbpf_prog_type_to_str(enum bpf_prog_type type, const char **str)
> +{
> +	if (type < BPF_PROG_TYPE_UNSPEC || type >= ARRAY_SIZE(prog_type_strs))
> +		return -EINVAL;
> +
> +	*str = prog_type_strs[type];
> +	return 0;
> +}
> +
>  static int
>  bpf_program__identify_section(struct bpf_program *prog,
>  			      enum bpf_prog_type *prog_type,
> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> index e8f70977d137..6846c488d8a2 100644
> --- a/tools/lib/bpf/libbpf.h
> +++ b/tools/lib/bpf/libbpf.h
> @@ -122,12 +122,20 @@ LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
>  				    bpf_object_clear_priv_t clear_priv);
>  LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);
>  
> +/* Program and expected attach types by section name */
>  LIBBPF_API int
>  libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
>  			 enum bpf_attach_type *expected_attach_type);
> +/* Attach type by section name */
>  LIBBPF_API int libbpf_attach_type_by_name(const char *name,
>  					  enum bpf_attach_type *attach_type);
>  
> +/* String representation of program type */
> +LIBBPF_API int libbpf_prog_type_from_str(const char *str,
> +					 enum bpf_prog_type *type);
> +LIBBPF_API int libbpf_prog_type_to_str(enum bpf_prog_type type,
> +				       const char **str);
> +
>  /* Accessors of bpf_program */
>  struct bpf_program;
>  LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
> diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
> index 664ce8e7a60e..2ea7c99f1579 100644
> --- a/tools/lib/bpf/libbpf.map
> +++ b/tools/lib/bpf/libbpf.map
> @@ -188,4 +188,6 @@ LIBBPF_0.0.4 {
>  LIBBPF_0.0.5 {
>  	global:
>  		bpf_btf_get_next_id;
> +		libbpf_prog_type_from_str;
> +		libbpf_prog_type_to_str;
>  } LIBBPF_0.0.4;


^ permalink raw reply

* Re: [PATCH net-next 00/12] net: hns3: add some cleanups and optimizations
From: Jakub Kicinski @ 2019-08-28 23:19 UTC (permalink / raw)
  To: Huazhong Tan
  Cc: davem, netdev, linux-kernel, salil.mehta, yisen.zhuang, linuxarm,
	Andrew Lunn
In-Reply-To: <1567002196-63242-1-git-send-email-tanhuazhong@huawei.com>

On Wed, 28 Aug 2019 22:23:04 +0800, Huazhong Tan wrote:
> This patch-set includes cleanups, optimizations and bugfix for
> the HNS3 ethernet controller driver.

The phy loopback (patch 10) could probably benefit from an expert look
but in general LGTM.

^ permalink raw reply

* Re: [PATCH net v4 0/2] r8152: fix side effect
From: David Miller @ 2019-08-28 23:17 UTC (permalink / raw)
  To: hayeswang; +Cc: netdev, nic_swsd, linux-kernel
In-Reply-To: <1394712342-15778-323-Taiwan-albertk@realtek.com>

From: Hayes Wang <hayeswang@realtek.com>
Date: Wed, 28 Aug 2019 20:56:11 +0800

> v4:
> Add Fixes tag for both patch #1 and #2.

I applied v3, sorry.

I think it is OK as I will backport things to v5.2 -stable anyways.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox