Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v2 10/12] net: sched: cls_u32: get rid of tp_c
From: Jamal Hadi Salim @ 2018-10-08 10:22 UTC (permalink / raw)
  To: davem; +Cc: xiyou.wangcong, jiri, netdev, viro, Jamal Hadi Salim
In-Reply-To: <20181008102244.22212-1-jhs@emojatatu.com>

From: Al Viro <viro@zeniv.linux.org.uk>

Both hnode ->tp_c and tp_c argument of u32_set_parms()
the latter is redundant, the former - never read...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 net/sched/cls_u32.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 3ed2c9866b36..3d4c360f9b0c 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -79,7 +79,6 @@ struct tc_u_hnode {
 	struct tc_u_hnode __rcu	*next;
 	u32			handle;
 	u32			prio;
-	struct tc_u_common	*tp_c;
 	int			refcnt;
 	unsigned int		divisor;
 	struct idr		handle_idr;
@@ -390,7 +389,6 @@ static int u32_init(struct tcf_proto *tp)
 	tp_c->refcnt++;
 	RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
 	rcu_assign_pointer(tp_c->hlist, root_ht);
-	root_ht->tp_c = tp_c;
 
 	rcu_assign_pointer(tp->root, root_ht);
 	tp->data = tp_c;
@@ -761,7 +759,7 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
 };
 
 static int u32_set_parms(struct net *net, struct tcf_proto *tp,
-			 unsigned long base, struct tc_u_common *tp_c,
+			 unsigned long base,
 			 struct tc_u_knode *n, struct nlattr **tb,
 			 struct nlattr *est, bool ovr,
 			 struct netlink_ext_ack *extack)
@@ -782,7 +780,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 		}
 
 		if (handle) {
-			ht_down = u32_lookup_ht(tp_c, handle);
+			ht_down = u32_lookup_ht(tp->data, handle);
 
 			if (!ht_down) {
 				NL_SET_ERR_MSG_MOD(extack, "Link hash table not found");
@@ -956,7 +954,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 		if (!new)
 			return -ENOMEM;
 
-		err = u32_set_parms(net, tp, base, tp_c, new, tb,
+		err = u32_set_parms(net, tp, base, new, tb,
 				    tca[TCA_RATE], ovr, extack);
 
 		if (err) {
@@ -1012,7 +1010,6 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 				return err;
 			}
 		}
-		ht->tp_c = tp_c;
 		ht->refcnt = 1;
 		ht->divisor = divisor;
 		ht->handle = handle;
@@ -1123,7 +1120,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 	}
 #endif
 
-	err = u32_set_parms(net, tp, base, tp_c, n, tb, tca[TCA_RATE], ovr,
+	err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr,
 			    extack);
 	if (err == 0) {
 		struct tc_u_knode __rcu **ins;
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 11/12] net: sched: cls_u32: keep track of knodes count in tc_u_common
From: Jamal Hadi Salim @ 2018-10-08 10:22 UTC (permalink / raw)
  To: davem; +Cc: xiyou.wangcong, jiri, netdev, viro, Jamal Hadi Salim
In-Reply-To: <20181008102244.22212-1-jhs@emojatatu.com>

From: Al Viro <viro@zeniv.linux.org.uk>

allows to simplify u32_delete() considerably

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 net/sched/cls_u32.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 3d4c360f9b0c..61593bee08db 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -97,6 +97,7 @@ struct tc_u_common {
 	int			refcnt;
 	struct idr		handle_idr;
 	struct hlist_node	hnode;
+	long			knodes;
 };
 
 static inline unsigned int u32_hash_fold(__be32 key,
@@ -452,6 +453,7 @@ static void u32_delete_key_freepf_work(struct work_struct *work)
 
 static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 {
+	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_knode __rcu **kp;
 	struct tc_u_knode *pkp;
 	struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
@@ -462,6 +464,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 		     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
 			if (pkp == key) {
 				RCU_INIT_POINTER(*kp, key->next);
+				tp_c->knodes--;
 
 				tcf_unbind_filter(tp, &key->res);
 				idr_remove(&ht->handle_idr, key->handle);
@@ -576,6 +579,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
 			    struct netlink_ext_ack *extack)
 {
+	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_knode *n;
 	unsigned int h;
 
@@ -583,6 +587,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
 		while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
 			RCU_INIT_POINTER(ht->ht[h],
 					 rtnl_dereference(n->next));
+			tp_c->knodes--;
 			tcf_unbind_filter(tp, &n->res);
 			u32_remove_hw_knode(tp, n, extack);
 			idr_remove(&ht->handle_idr, n->handle);
@@ -1141,6 +1146,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 
 		RCU_INIT_POINTER(n->next, pins);
 		rcu_assign_pointer(*ins, n);
+		tp_c->knodes++;
 		*arg = n;
 		return 0;
 	}
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 12/12] net: sched: cls_u32: simplify the hell out u32_delete() emptiness check
From: Jamal Hadi Salim @ 2018-10-08 10:22 UTC (permalink / raw)
  To: davem; +Cc: xiyou.wangcong, jiri, netdev, viro, Jamal Hadi Salim
In-Reply-To: <20181008102244.22212-1-jhs@emojatatu.com>

From: Al Viro <viro@zeniv.linux.org.uk>

Now that we have the knode count, we can instantly check if
any hnodes are non-empty.  And that kills the check for extra
references to root hnode - those could happen only if there was
a knode to carry such a link.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 net/sched/cls_u32.c | 48 +-----------------------------------------------
 1 file changed, 1 insertion(+), 47 deletions(-)

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 61593bee08db..ac79a40a0392 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -627,17 +627,6 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
 	return -ENOENT;
 }
 
-static bool ht_empty(struct tc_u_hnode *ht)
-{
-	unsigned int h;
-
-	for (h = 0; h <= ht->divisor; h++)
-		if (rcu_access_pointer(ht->ht[h]))
-			return false;
-
-	return true;
-}
-
 static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_u_common *tp_c = tp->data;
@@ -675,13 +664,9 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
 		      struct netlink_ext_ack *extack)
 {
 	struct tc_u_hnode *ht = arg;
-	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
 	struct tc_u_common *tp_c = tp->data;
 	int ret = 0;
 
-	if (ht == NULL)
-		goto out;
-
 	if (TC_U32_KEY(ht->handle)) {
 		u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
 		ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
@@ -702,38 +687,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
 	}
 
 out:
-	*last = true;
-	if (root_ht) {
-		if (root_ht->refcnt > 1) {
-			*last = false;
-			goto ret;
-		}
-		if (root_ht->refcnt == 1) {
-			if (!ht_empty(root_ht)) {
-				*last = false;
-				goto ret;
-			}
-		}
-	}
-
-	if (tp_c->refcnt > 1) {
-		*last = false;
-		goto ret;
-	}
-
-	if (tp_c->refcnt == 1) {
-		struct tc_u_hnode *ht;
-
-		for (ht = rtnl_dereference(tp_c->hlist);
-		     ht;
-		     ht = rtnl_dereference(ht->next))
-			if (!ht_empty(ht)) {
-				*last = false;
-				break;
-			}
-	}
-
-ret:
+	*last = tp_c->refcnt == 1 && tp_c->knodes == 0;
 	return ret;
 }
 
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH] isdn/gigaset: mark expected switch fall-throughs
From: David Miller @ 2018-10-08 17:36 UTC (permalink / raw)
  To: gustavo; +Cc: pebolle, isdn, gigaset307x-common, netdev, linux-kernel
In-Reply-To: <20181008114046.GA27098@embeddedor.com>

From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 8 Oct 2018 13:40:46 +0200

> Replace "--v-- fall through --v--" with a proper "fall through"
> annotation. Also, change "bad cid: fall through" to
> "fall through - bad cid".
> 
> This fix is part of the ongoing efforts to enabling -Wimplicit-fallthrough
> 
> Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>

Applied.

^ permalink raw reply

* [PATCH v2 0/3] bpf: allow zero-initialising hash map seed
From: Lorenz Bauer @ 2018-10-08 10:32 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linux-api, Lorenz Bauer
In-Reply-To: <20181001104509.24211-1-lmb@cloudflare.com>

Allow forcing the seed of a hash table to zero, for deterministic
execution during benchmarking and testing.

Comments adressed from v1:
* Add comment to discourage production use to linux/bpf.h
* Require CAP_SYS_ADMIN

Lorenz Bauer (3):
  bpf: allow zero-initializing hash map seed
  tools: sync linux/bpf.h
  tools: add selftest for BPF_F_ZERO_SEED

 include/uapi/linux/bpf.h                |  2 +
 kernel/bpf/hashtab.c                    | 13 ++++-
 tools/include/uapi/linux/bpf.h          |  2 +
 tools/testing/selftests/bpf/test_maps.c | 68 +++++++++++++++++++++----
 4 files changed, 72 insertions(+), 13 deletions(-)

-- 
2.17.1

^ permalink raw reply

* [PATCH v2 1/3] bpf: allow zero-initializing hash map seed
From: Lorenz Bauer @ 2018-10-08 10:32 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linux-api, Lorenz Bauer
In-Reply-To: <20181008103221.13468-1-lmb@cloudflare.com>

Add a new flag BPF_F_ZERO_SEED, which forces a hash map
to initialize the seed to zero. This is useful when doing
performance analysis both on individual BPF programs, as
well as the kernel's hash table implementation.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
 include/uapi/linux/bpf.h |  2 ++
 kernel/bpf/hashtab.c     | 13 +++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f9187b41dff6..2c121f862082 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -253,6 +253,8 @@ enum bpf_attach_type {
 #define BPF_F_NO_COMMON_LRU	(1U << 1)
 /* Specify numa node during map creation */
 #define BPF_F_NUMA_NODE		(1U << 2)
+/* Zero-initialize hash function seed. This should only be used for testing. */
+#define BPF_F_ZERO_SEED		(1U << 6)
 
 /* flags for BPF_PROG_QUERY */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 2c1790288138..4b7c76765d9d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -23,7 +23,7 @@
 
 #define HTAB_CREATE_FLAG_MASK						\
 	(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE |	\
-	 BPF_F_RDONLY | BPF_F_WRONLY)
+	 BPF_F_RDONLY | BPF_F_WRONLY | BPF_F_ZERO_SEED)
 
 struct bucket {
 	struct hlist_nulls_head head;
@@ -244,6 +244,7 @@ static int htab_map_alloc_check(union bpf_attr *attr)
 	 */
 	bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
 	bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
+	bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
 	int numa_node = bpf_map_attr_numa_node(attr);
 
 	BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
@@ -257,6 +258,10 @@ static int htab_map_alloc_check(union bpf_attr *attr)
 		 */
 		return -EPERM;
 
+	if (zero_seed && !capable(CAP_SYS_ADMIN))
+		/* Guard against local DoS, and discourage production use. */
+		return -EPERM;
+
 	if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
 		/* reserved bits should not be used */
 		return -EINVAL;
@@ -373,7 +378,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	if (!htab->buckets)
 		goto free_htab;
 
-	htab->hashrnd = get_random_int();
+	if (htab->map.map_flags & BPF_F_ZERO_SEED)
+		htab->hashrnd = 0;
+	else
+		htab->hashrnd = get_random_int();
+
 	for (i = 0; i < htab->n_buckets; i++) {
 		INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
 		raw_spin_lock_init(&htab->buckets[i].lock);
-- 
2.17.1

^ permalink raw reply related

* [PATCH v2 2/3] tools: sync linux/bpf.h
From: Lorenz Bauer @ 2018-10-08 10:32 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linux-api, Lorenz Bauer
In-Reply-To: <20181008103221.13468-1-lmb@cloudflare.com>

Synchronize changes to linux/bpf.h from
commit 88db241b34bf ("bpf: allow zero-initializing hash map seed").

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
 tools/include/uapi/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f9187b41dff6..2c121f862082 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -253,6 +253,8 @@ enum bpf_attach_type {
 #define BPF_F_NO_COMMON_LRU	(1U << 1)
 /* Specify numa node during map creation */
 #define BPF_F_NUMA_NODE		(1U << 2)
+/* Zero-initialize hash function seed. This should only be used for testing. */
+#define BPF_F_ZERO_SEED		(1U << 6)
 
 /* flags for BPF_PROG_QUERY */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
-- 
2.17.1

^ permalink raw reply related

* [PATCH v2 3/3] tools: add selftest for BPF_F_ZERO_SEED
From: Lorenz Bauer @ 2018-10-08 10:32 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linux-api, Lorenz Bauer
In-Reply-To: <20181008103221.13468-1-lmb@cloudflare.com>

Check that iterating two separate hash maps produces the same
order of keys if BPF_F_ZERO_SEED is used.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
 tools/testing/selftests/bpf/test_maps.c | 68 +++++++++++++++++++++----
 1 file changed, 57 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 9b552c0fc47d..a8d6af27803a 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -257,23 +257,35 @@ static void test_hashmap_percpu(int task, void *data)
 	close(fd);
 }
 
+static int helper_fill_hashmap(int max_entries)
+{
+	int i, fd, ret;
+	long long key, value;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+			    max_entries, map_flags);
+	CHECK(fd < 0,
+	      "failed to create hashmap",
+	      "err: %s, flags: 0x%x\n", strerror(errno), map_flags);
+
+	for (i = 0; i < max_entries; i++) {
+		key = i; value = key;
+		ret = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+		CHECK(ret != 0,
+		      "can't update hashmap",
+		      "err: %s\n", strerror(ret));
+	}
+
+	return fd;
+}
+
 static void test_hashmap_walk(int task, void *data)
 {
 	int fd, i, max_entries = 1000;
 	long long key, value, next_key;
 	bool next_key_valid = true;
 
-	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
-			    max_entries, map_flags);
-	if (fd < 0) {
-		printf("Failed to create hashmap '%s'!\n", strerror(errno));
-		exit(1);
-	}
-
-	for (i = 0; i < max_entries; i++) {
-		key = i; value = key;
-		assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
-	}
+	fd = helper_fill_hashmap(max_entries);
 
 	for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
 					 &next_key) == 0; i++) {
@@ -305,6 +317,39 @@ static void test_hashmap_walk(int task, void *data)
 	close(fd);
 }
 
+static void test_hashmap_zero_seed(void)
+{
+	int i, first, second, old_flags;
+	long long key, next_first, next_second;
+
+	old_flags = map_flags;
+	map_flags |= BPF_F_ZERO_SEED;
+
+	first = helper_fill_hashmap(3);
+	second = helper_fill_hashmap(3);
+
+	for (i = 0; ; i++) {
+		void *key_ptr = !i ? NULL : &key;
+
+		if (bpf_map_get_next_key(first, key_ptr, &next_first) != 0)
+			break;
+
+		CHECK(bpf_map_get_next_key(second, key_ptr, &next_second) != 0,
+		      "next_key for second map must succeed",
+		      "key_ptr: %p", key_ptr);
+		CHECK(next_first != next_second,
+		      "keys must match",
+		      "i: %d first: %lld second: %lld\n", i,
+		      next_first, next_second);
+
+		key = next_first;
+	}
+
+	map_flags = old_flags;
+	close(first);
+	close(second);
+}
+
 static void test_arraymap(int task, void *data)
 {
 	int key, next_key, fd;
@@ -1417,6 +1462,7 @@ static void run_all_tests(void)
 	test_hashmap(0, NULL);
 	test_hashmap_percpu(0, NULL);
 	test_hashmap_walk(0, NULL);
+	test_hashmap_zero_seed();
 
 	test_arraymap(0, NULL);
 	test_arraymap_percpu(0, NULL);
-- 
2.17.1

^ permalink raw reply related

* Re: [PATCH net-next] net/ipv6: stop leaking percpu memory in fib6 info
From: David Miller @ 2018-10-08 17:46 UTC (permalink / raw)
  To: rppt; +Cc: dsahern, netdev, stable
In-Reply-To: <1539000363-25333-1-git-send-email-rppt@linux.vnet.ibm.com>

From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Mon,  8 Oct 2018 15:06:03 +0300

> The fib6_info_alloc() function allocates percpu memory to hold per CPU
> pointers to rt6_info, but this memory is never freed. Fix it.
> 
> Fixes: a64efe142f5e ("net/ipv6: introduce fib6_info struct and helpers")
> 

Please do not put empty lines between Fixes: and other tags.  They
belong together as a group.

> Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
> Cc: stable@vger.kernel.org

Several problems here.

You cannot submit patches to net-next and expect them to go onward to
-stable.  That is not appropriate.  If it is going to stable, you
must target 'net' not 'net-next'.

Furthermore, for networking changes, explicit CC: of stable is not
appropriate.  Instead you must explicitly ask me to queue the patch
up for -stable as I handle networking stable submissions myself.

Thank you.

^ permalink raw reply

* Re: [PATCH v2 net-next 13/23] rtnetlink: Update ipmr_rtm_dumplink for strict data checking
From: Christian Brauner @ 2018-10-08 10:43 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, jbenc, stephen, David Ahern
In-Reply-To: <20181008031644.15989-14-dsahern@kernel.org>

On Sun, Oct 07, 2018 at 08:16:34PM -0700, David Ahern wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> Update ipmr_rtm_dumplink for strict data checking. If the flag is set,
> the dump request is expected to have an ifinfomsg struct as the header.
> All elements of the struct are expected to be 0 and no attributes can
> be appended.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>

Acked-by: Christian Brauner <christian@brauner.io>

> ---
>  net/ipv4/ipmr.c | 32 ++++++++++++++++++++++++++++++++
>  1 file changed, 32 insertions(+)
> 
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 5660adcf7a04..e7322e407bb4 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -2710,6 +2710,31 @@ static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb)
>  	return true;
>  }
>  
> +static int ipmr_valid_dumplink(const struct nlmsghdr *nlh,
> +			       struct netlink_ext_ack *extack)
> +{
> +	struct ifinfomsg *ifm;
> +
> +	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
> +		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump");
> +		return -EINVAL;
> +	}
> +
> +	if (nlmsg_attrlen(nlh, sizeof(*ifm))) {
> +		NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump");
> +		return -EINVAL;
> +	}
> +
> +	ifm = nlmsg_data(nlh);
> +	if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
> +	    ifm->ifi_change || ifm->ifi_index) {
> +		NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request");
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
>  static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
>  {
>  	struct net *net = sock_net(skb->sk);
> @@ -2718,6 +2743,13 @@ static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
>  	unsigned int e = 0, s_e;
>  	struct mr_table *mrt;
>  
> +	if (cb->strict_check) {
> +		int err = ipmr_valid_dumplink(cb->nlh, cb->extack);
> +
> +		if (err < 0)
> +			return err;
> +	}
> +
>  	s_t = cb->args[0];
>  	s_e = cb->args[1];
>  
> -- 
> 2.11.0
> 

^ permalink raw reply

* Re: [PATCH] isdn/gigaset/isocdata: mark expected switch fall-through
From: David Miller @ 2018-10-08 17:54 UTC (permalink / raw)
  To: gustavo; +Cc: pebolle, isdn, gigaset307x-common, netdev, linux-kernel
In-Reply-To: <20181008121539.GA29109@embeddedor.com>

From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 8 Oct 2018 14:15:39 +0200

> Notice that in this particular case, I replaced the
> "--v-- fall through --v--" comment with a proper
> "fall through", which is what GCC is expecting to
> find.
> 
> This fix is part of the ongoing efforts to enabling
> -Wimplicit-fallthrough
> 
> Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>

Applied to net-next.

^ permalink raw reply

* Re: [PATCH v2 net-next 15/23] net/neighbor: Update neigh_dump_info for strict data checking
From: Christian Brauner @ 2018-10-08 10:47 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, jbenc, stephen, David Ahern
In-Reply-To: <20181008031644.15989-16-dsahern@kernel.org>

On Sun, Oct 07, 2018 at 08:16:36PM -0700, David Ahern wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> Update neigh_dump_info for strict data checking. If the flag is set,
> the dump request is expected to have an ndmsg struct as the header
> potentially followed by one or more attributes. Any data passed in the
> header or as an attribute is taken as a request to influence the data
> returned. Only values supported by the dump handler are allowed to be
> non-0 or set in the request. At the moment only the NDA_IFINDEX and
> NDA_MASTER attributes are supported.
> 
> Existing code does not fail the dump if nlmsg_parse fails. That behavior
> is kept for non-strict checking.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>

Acked-by: Christian Brauner <christian@brauner.io>

> ---
>  net/core/neighbour.c | 82 ++++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 67 insertions(+), 15 deletions(-)
> 
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index b06f794bf91e..7c8a3a0ee059 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -2426,11 +2426,73 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
>  
>  }
>  
> +static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
> +				bool strict_check,
> +				struct neigh_dump_filter *filter,
> +				struct netlink_ext_ack *extack)
> +{
> +	struct nlattr *tb[NDA_MAX + 1];
> +	int err, i;
> +
> +	if (strict_check) {
> +		struct ndmsg *ndm;
> +
> +		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
> +			NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
> +			return -EINVAL;
> +		}
> +
> +		ndm = nlmsg_data(nlh);
> +		if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
> +		    ndm->ndm_state || ndm->ndm_flags || ndm->ndm_type) {
> +			NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
> +			return -EINVAL;
> +		}
> +
> +		err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
> +					 NULL, extack);
> +	} else {
> +		err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
> +				  NULL, extack);
> +	}
> +	if (err < 0)
> +		return err;
> +
> +	for (i = 0; i <= NDA_MAX; ++i) {
> +		if (!tb[i])
> +			continue;
> +
> +		/* all new attributes should require strict_check */
> +		switch (i) {
> +		case NDA_IFINDEX:
> +			if (nla_len(tb[i]) != sizeof(u32)) {
> +				NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in neighbor dump request");
> +				return -EINVAL;
> +			}
> +			filter->dev_idx = nla_get_u32(tb[i]);
> +			break;
> +		case NDA_MASTER:
> +			if (nla_len(tb[i]) != sizeof(u32)) {
> +				NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in neighbor dump request");
> +				return -EINVAL;
> +			}
> +			filter->master_idx = nla_get_u32(tb[i]);
> +			break;
> +		default:
> +			if (strict_check) {
> +				NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
> +				return -EINVAL;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
>  {
>  	const struct nlmsghdr *nlh = cb->nlh;
>  	struct neigh_dump_filter filter = {};
> -	struct nlattr *tb[NDA_MAX + 1];
>  	struct neigh_table *tbl;
>  	int t, family, s_t;
>  	int proxy = 0;
> @@ -2445,20 +2507,10 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
>  	    ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
>  		proxy = 1;
>  
> -	err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL,
> -			  cb->extack);
> -	if (!err) {
> -		if (tb[NDA_IFINDEX]) {
> -			if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
> -				return -EINVAL;
> -			filter.dev_idx = nla_get_u32(tb[NDA_IFINDEX]);
> -		}
> -		if (tb[NDA_MASTER]) {
> -			if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
> -				return -EINVAL;
> -			filter.master_idx = nla_get_u32(tb[NDA_MASTER]);
> -		}
> -	}
> +	err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
> +	if (err < 0 && cb->strict_check)
> +		return err;
> +
>  	s_t = cb->args[0];
>  
>  	for (t = 0; t < NEIGH_NR_TABLES; t++) {
> -- 
> 2.11.0
> 

^ permalink raw reply

* Re: [PATCH v2 net-next 16/23] net/neighbor: Update neightbl_dump_info for strict data checking
From: Christian Brauner @ 2018-10-08 10:47 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, jbenc, stephen, David Ahern
In-Reply-To: <20181008031644.15989-17-dsahern@kernel.org>

On Sun, Oct 07, 2018 at 08:16:37PM -0700, David Ahern wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> Update neightbl_dump_info for strict data checking. If the flag is set,
> the dump request is expected to have an ndtmsg struct as the header.
> All elements of the struct are expected to be 0 and no attributes can
> be appended.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>

Acked-by: Christian Brauner <christian@brauner.io>

> ---
>  net/core/neighbour.c | 38 +++++++++++++++++++++++++++++++++++---
>  1 file changed, 35 insertions(+), 3 deletions(-)
> 
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index 7c8a3a0ee059..dc1389b8beb1 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -2164,15 +2164,47 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
>  	return err;
>  }
>  
> +static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
> +				    struct netlink_ext_ack *extack)
> +{
> +	struct ndtmsg *ndtm;
> +
> +	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
> +		NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
> +		return -EINVAL;
> +	}
> +
> +	ndtm = nlmsg_data(nlh);
> +	if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
> +		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
> +		return -EINVAL;
> +	}
> +
> +	if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
> +		NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
>  static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
>  {
> +	const struct nlmsghdr *nlh = cb->nlh;
>  	struct net *net = sock_net(skb->sk);
>  	int family, tidx, nidx = 0;
>  	int tbl_skip = cb->args[0];
>  	int neigh_skip = cb->args[1];
>  	struct neigh_table *tbl;
>  
> -	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
> +	if (cb->strict_check) {
> +		int err = neightbl_valid_dump_info(nlh, cb->extack);
> +
> +		if (err < 0)
> +			return err;
> +	}
> +
> +	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
>  
>  	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
>  		struct neigh_parms *p;
> @@ -2185,7 +2217,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
>  			continue;
>  
>  		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
> -				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
> +				       nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
>  				       NLM_F_MULTI) < 0)
>  			break;
>  
> @@ -2200,7 +2232,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
>  
>  			if (neightbl_fill_param_info(skb, tbl, p,
>  						     NETLINK_CB(cb->skb).portid,
> -						     cb->nlh->nlmsg_seq,
> +						     nlh->nlmsg_seq,
>  						     RTM_NEWNEIGHTBL,
>  						     NLM_F_MULTI) < 0)
>  				goto out;
> -- 
> 2.11.0
> 

^ permalink raw reply

* Re: [PATCH v2 net-next 18/23] net/fib_rules: Update fib_nl_dumprule for strict data checking
From: Christian Brauner @ 2018-10-08 10:48 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, jbenc, stephen, David Ahern
In-Reply-To: <20181008031644.15989-19-dsahern@kernel.org>

On Sun, Oct 07, 2018 at 08:16:39PM -0700, David Ahern wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> Update fib_nl_dumprule for strict data checking. If the flag is set,
> the dump request is expected to have fib_rule_hdr struct as the header.
> All elements of the struct are expected to be 0 and no attributes can
> be appended.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>

Acked-by: Christian Brauner <christian@brauner.io>

> ---
>  net/core/fib_rules.c | 36 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 35 insertions(+), 1 deletion(-)
> 
> diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
> index 0ff3953f64aa..ffbb827723a2 100644
> --- a/net/core/fib_rules.c
> +++ b/net/core/fib_rules.c
> @@ -1063,13 +1063,47 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
>  	return err;
>  }
>  
> +static int fib_valid_dumprule_req(const struct nlmsghdr *nlh,
> +				   struct netlink_ext_ack *extack)
> +{
> +	struct fib_rule_hdr *frh;
> +
> +	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
> +		NL_SET_ERR_MSG(extack, "Invalid header for fib rule dump request");
> +		return -EINVAL;
> +	}
> +
> +	frh = nlmsg_data(nlh);
> +	if (frh->dst_len || frh->src_len || frh->tos || frh->table ||
> +	    frh->res1 || frh->res2 || frh->action || frh->flags) {
> +		NL_SET_ERR_MSG(extack,
> +			       "Invalid values in header for fib rule dump request");
> +		return -EINVAL;
> +	}
> +
> +	if (nlmsg_attrlen(nlh, sizeof(*frh))) {
> +		NL_SET_ERR_MSG(extack, "Invalid data after header in fib rule dump request");
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
>  static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
>  {
> +	const struct nlmsghdr *nlh = cb->nlh;
>  	struct net *net = sock_net(skb->sk);
>  	struct fib_rules_ops *ops;
>  	int idx = 0, family;
>  
> -	family = rtnl_msg_family(cb->nlh);
> +	if (cb->strict_check) {
> +		int err = fib_valid_dumprule_req(nlh, cb->extack);
> +
> +		if (err < 0)
> +			return err;
> +	}
> +
> +	family = rtnl_msg_family(nlh);
>  	if (family != AF_UNSPEC) {
>  		/* Protocol specific dump request */
>  		ops = lookup_rules_ops(net, family);
> -- 
> 2.11.0
> 

^ permalink raw reply

* Re: [PATCH v2 net-next 20/23] net: Update netconf dump handlers for strict data checking
From: Christian Brauner @ 2018-10-08 10:51 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, jbenc, stephen, David Ahern
In-Reply-To: <20181008031644.15989-21-dsahern@kernel.org>

On Sun, Oct 07, 2018 at 08:16:41PM -0700, David Ahern wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> Update inet_netconf_dump_devconf, inet6_netconf_dump_devconf, and
> mpls_netconf_dump_devconf for strict data checking. If the flag is set,
> the dump request is expected to have an netconfmsg struct as the header.
> The struct only has the family member and no attributes can be appended.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>

Acked-by: Christian Brauner <christian@brauner.io>

> ---
>  net/ipv4/devinet.c  | 22 +++++++++++++++++++---
>  net/ipv6/addrconf.c | 22 +++++++++++++++++++---
>  net/mpls/af_mpls.c  | 18 +++++++++++++++++-
>  3 files changed, 55 insertions(+), 7 deletions(-)
> 
> diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
> index 6f2bbd04e950..d122ebbe5980 100644
> --- a/net/ipv4/devinet.c
> +++ b/net/ipv4/devinet.c
> @@ -2086,6 +2086,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
>  static int inet_netconf_dump_devconf(struct sk_buff *skb,
>  				     struct netlink_callback *cb)
>  {
> +	const struct nlmsghdr *nlh = cb->nlh;
>  	struct net *net = sock_net(skb->sk);
>  	int h, s_h;
>  	int idx, s_idx;
> @@ -2093,6 +2094,21 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
>  	struct in_device *in_dev;
>  	struct hlist_head *head;
>  
> +	if (cb->strict_check) {
> +		struct netlink_ext_ack *extack = cb->extack;
> +		struct netconfmsg *ncm;
> +
> +		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
> +			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
> +			return -EINVAL;
> +		}
> +
> +		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
> +			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
> +			return -EINVAL;
> +		}
> +	}
> +
>  	s_h = cb->args[0];
>  	s_idx = idx = cb->args[1];
>  
> @@ -2112,7 +2128,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
>  			if (inet_netconf_fill_devconf(skb, dev->ifindex,
>  						      &in_dev->cnf,
>  						      NETLINK_CB(cb->skb).portid,
> -						      cb->nlh->nlmsg_seq,
> +						      nlh->nlmsg_seq,
>  						      RTM_NEWNETCONF,
>  						      NLM_F_MULTI,
>  						      NETCONFA_ALL) < 0) {
> @@ -2129,7 +2145,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
>  		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
>  					      net->ipv4.devconf_all,
>  					      NETLINK_CB(cb->skb).portid,
> -					      cb->nlh->nlmsg_seq,
> +					      nlh->nlmsg_seq,
>  					      RTM_NEWNETCONF, NLM_F_MULTI,
>  					      NETCONFA_ALL) < 0)
>  			goto done;
> @@ -2140,7 +2156,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
>  		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
>  					      net->ipv4.devconf_dflt,
>  					      NETLINK_CB(cb->skb).portid,
> -					      cb->nlh->nlmsg_seq,
> +					      nlh->nlmsg_seq,
>  					      RTM_NEWNETCONF, NLM_F_MULTI,
>  					      NETCONFA_ALL) < 0)
>  			goto done;
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index ce071d85ad00..2496b12bf721 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -666,6 +666,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
>  static int inet6_netconf_dump_devconf(struct sk_buff *skb,
>  				      struct netlink_callback *cb)
>  {
> +	const struct nlmsghdr *nlh = cb->nlh;
>  	struct net *net = sock_net(skb->sk);
>  	int h, s_h;
>  	int idx, s_idx;
> @@ -673,6 +674,21 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
>  	struct inet6_dev *idev;
>  	struct hlist_head *head;
>  
> +	if (cb->strict_check) {
> +		struct netlink_ext_ack *extack = cb->extack;
> +		struct netconfmsg *ncm;
> +
> +		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
> +			NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf dump request");
> +			return -EINVAL;
> +		}
> +
> +		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
> +			NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in netconf dump request");
> +			return -EINVAL;
> +		}
> +	}
> +
>  	s_h = cb->args[0];
>  	s_idx = idx = cb->args[1];
>  
> @@ -692,7 +708,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
>  			if (inet6_netconf_fill_devconf(skb, dev->ifindex,
>  						       &idev->cnf,
>  						       NETLINK_CB(cb->skb).portid,
> -						       cb->nlh->nlmsg_seq,
> +						       nlh->nlmsg_seq,
>  						       RTM_NEWNETCONF,
>  						       NLM_F_MULTI,
>  						       NETCONFA_ALL) < 0) {
> @@ -709,7 +725,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
>  		if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
>  					       net->ipv6.devconf_all,
>  					       NETLINK_CB(cb->skb).portid,
> -					       cb->nlh->nlmsg_seq,
> +					       nlh->nlmsg_seq,
>  					       RTM_NEWNETCONF, NLM_F_MULTI,
>  					       NETCONFA_ALL) < 0)
>  			goto done;
> @@ -720,7 +736,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
>  		if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
>  					       net->ipv6.devconf_dflt,
>  					       NETLINK_CB(cb->skb).portid,
> -					       cb->nlh->nlmsg_seq,
> +					       nlh->nlmsg_seq,
>  					       RTM_NEWNETCONF, NLM_F_MULTI,
>  					       NETCONFA_ALL) < 0)
>  			goto done;
> diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
> index 0458c8aa5c11..7f891ffffc05 100644
> --- a/net/mpls/af_mpls.c
> +++ b/net/mpls/af_mpls.c
> @@ -1263,6 +1263,7 @@ static int mpls_netconf_get_devconf(struct sk_buff *in_skb,
>  static int mpls_netconf_dump_devconf(struct sk_buff *skb,
>  				     struct netlink_callback *cb)
>  {
> +	const struct nlmsghdr *nlh = cb->nlh;
>  	struct net *net = sock_net(skb->sk);
>  	struct hlist_head *head;
>  	struct net_device *dev;
> @@ -1270,6 +1271,21 @@ static int mpls_netconf_dump_devconf(struct sk_buff *skb,
>  	int idx, s_idx;
>  	int h, s_h;
>  
> +	if (cb->strict_check) {
> +		struct netlink_ext_ack *extack = cb->extack;
> +		struct netconfmsg *ncm;
> +
> +		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
> +			NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf dump request");
> +			return -EINVAL;
> +		}
> +
> +		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
> +			NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in netconf dump request");
> +			return -EINVAL;
> +		}
> +	}
> +
>  	s_h = cb->args[0];
>  	s_idx = idx = cb->args[1];
>  
> @@ -1286,7 +1302,7 @@ static int mpls_netconf_dump_devconf(struct sk_buff *skb,
>  				goto cont;
>  			if (mpls_netconf_fill_devconf(skb, mdev,
>  						      NETLINK_CB(cb->skb).portid,
> -						      cb->nlh->nlmsg_seq,
> +						      nlh->nlmsg_seq,
>  						      RTM_NEWNETCONF,
>  						      NLM_F_MULTI,
>  						      NETCONFA_ALL) < 0) {
> -- 
> 2.11.0
> 

^ permalink raw reply

* Re: [PATCH net-next] mt76x0: pci: fix set external PA I/O current
From: David Miller @ 2018-10-08 18:02 UTC (permalink / raw)
  To: yuehaibing
  Cc: kvalo, nbd, lorenzo.bianconi, sgruszka, linux-wireless,
	kernel-janitors, netdev, linux-kernel
In-Reply-To: <1539004909-195005-1-git-send-email-yuehaibing@huawei.com>

From: YueHaibing <yuehaibing@huawei.com>
Date: Mon, 8 Oct 2018 13:21:49 +0000

> Fixes gcc '-Wunused-but-set-variable' warning:
> 
> drivers/net/wireless/mediatek/mt76/mt76x0/pci.c: In function 'mt76x0e_register_device':
> drivers/net/wireless/mediatek/mt76/mt76x0/pci.c:107:8: warning:
>  variable 'data' set but not used [-Wunused-but-set-variable]
> 
> It seems correct value to write is 'data'
> 
> Fixes: 2b2cb40bcd7d ("mt76x0: pci: add hw initialization at bootstrap")
> Signed-off-by: YueHaibing <yuehaibing@huawei.com>

Wireless changes should be marked as appropriate in the Subject line
for targetting the wireless GIT tree, not "net-next".

^ permalink raw reply

* Re: [PATCH v2 net-next 19/23] net/ipv6: Update ip6addrlbl_dump for strict data checking
From: Christian Brauner @ 2018-10-08 10:51 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, jbenc, stephen, David Ahern
In-Reply-To: <20181008031644.15989-20-dsahern@kernel.org>

On Sun, Oct 07, 2018 at 08:16:40PM -0700, David Ahern wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> Update ip6addrlbl_dump for strict data checking. If the flag is set,
> the dump request is expected to have an ifaddrlblmsg struct as the
> header. All elements of the struct are expected to be 0 and no
> attributes can be appended.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>

Acked-by: Christian Brauner <christian@brauner.io>

> ---
>  net/ipv6/addrlabel.c | 34 +++++++++++++++++++++++++++++++++-
>  1 file changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
> index 1d6ced37ad71..0d1ee82ee55b 100644
> --- a/net/ipv6/addrlabel.c
> +++ b/net/ipv6/addrlabel.c
> @@ -458,20 +458,52 @@ static int ip6addrlbl_fill(struct sk_buff *skb,
>  	return 0;
>  }
>  
> +static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh,
> +				     struct netlink_ext_ack *extack)
> +{
> +	struct ifaddrlblmsg *ifal;
> +
> +	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) {
> +		NL_SET_ERR_MSG_MOD(extack, "Invalid header for address label dump request");
> +		return -EINVAL;
> +	}
> +
> +	ifal = nlmsg_data(nlh);
> +	if (ifal->__ifal_reserved || ifal->ifal_prefixlen ||
> +	    ifal->ifal_flags || ifal->ifal_index || ifal->ifal_seq) {
> +		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address label dump request");
> +		return -EINVAL;
> +	}
> +
> +	if (nlmsg_attrlen(nlh, sizeof(*ifal))) {
> +		NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump requewst");
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
>  static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  {
> +	const struct nlmsghdr *nlh = cb->nlh;
>  	struct net *net = sock_net(skb->sk);
>  	struct ip6addrlbl_entry *p;
>  	int idx = 0, s_idx = cb->args[0];
>  	int err;
>  
> +	if (cb->strict_check) {
> +		err = ip6addrlbl_valid_dump_req(nlh, cb->extack);
> +		if (err < 0)
> +			return err;
> +	}
> +
>  	rcu_read_lock();
>  	hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
>  		if (idx >= s_idx) {
>  			err = ip6addrlbl_fill(skb, p,
>  					      net->ipv6.ip6addrlbl_table.seq,
>  					      NETLINK_CB(cb->skb).portid,
> -					      cb->nlh->nlmsg_seq,
> +					      nlh->nlmsg_seq,
>  					      RTM_NEWADDRLABEL,
>  					      NLM_F_MULTI);
>  			if (err < 0)
> -- 
> 2.11.0
> 

^ permalink raw reply

* Re: [PATCH] ethtool: fix a privilege escalation bug
From: Michal Kubecek @ 2018-10-08 18:03 UTC (permalink / raw)
  To: Wenwen Wang
  Cc: Kangjie Lu, David S. Miller, Florian Fainelli, Kees Cook,
	Andrew Lunn, Edward Cree, Ilya Lesokhin, Yury Norov, Alan Brady,
	Stephen Hemminger, open list:NETWORKING [GENERAL], open list
In-Reply-To: <1539013777-1625-1-git-send-email-wang6495@umn.edu>

On Mon, Oct 08, 2018 at 10:49:35AM -0500, Wenwen Wang wrote:
> In dev_ethtool(), the eth command 'ethcmd' is firstly copied from the
> use-space buffer 'useraddr' and checked to see whether it is
> ETHTOOL_PERQUEUE. If yes, the sub-command 'sub_cmd' is further copied from
> the user space. Otherwise, 'sub_cmd' is the same as 'ethcmd'. Next,
> according to 'sub_cmd', a permission check is enforced through the function
> ns_capable(). For example, the permission check is required if 'sub_cmd' is
> ETHTOOL_SCOALESCE, but it is not necessary if 'sub_cmd' is
> ETHTOOL_GCOALESCE, as suggested in the comment "Allow some commands to be
> done by anyone". The following execution invokes different handlers
> according to 'ethcmd'. Specifically, if 'ethcmd' is ETHTOOL_PERQUEUE,
> ethtool_set_per_queue() is called. In ethtool_set_per_queue(), the kernel
> object 'per_queue_opt' is copied again from the user-space buffer
> 'useraddr' and 'per_queue_opt.sub_command' is used to determine which
> operation should be performed. Given that the buffer 'useraddr' is in the
> user space, a malicious user can race to change the sub-command between the
> two copies. In particular, the attacker can supply ETHTOOL_PERQUEUE and
> ETHTOOL_GCOALESCE to bypass the permission check in dev_ethtool(). Then
> before ethtool_set_per_queue() is called, the attacker changes
> ETHTOOL_GCOALESCE to ETHTOOL_SCOALESCE. In this way, the attacker can
> bypass the permission check and execute ETHTOOL_SCOALESCE.
> 
> This patch enforces a check in ethtool_set_per_queue() after the second
> copy from 'useraddr'. If the sub-command is different from the one obtained
> in the first copy in dev_ethtool(), an error code EINVAL will be returned.
> 
> Signed-off-by: Wenwen Wang <wang6495@umn.edu>

Reviewed-by: Michal Kubecek <mkubecek@suse.cz>

I'm just not sure if "privilege escalation" is an appropriate term but
at least some sources define it loosely enough to cover also a simple
permission check bypass like this.

Michal Kubecek

> ---
>  net/core/ethtool.c | 8 ++++++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/net/core/ethtool.c b/net/core/ethtool.c
> index c9993c6..ccb337e 100644
> --- a/net/core/ethtool.c
> +++ b/net/core/ethtool.c
> @@ -2462,13 +2462,17 @@ static int ethtool_set_per_queue_coalesce(struct net_device *dev,
>  	return ret;
>  }
>  
> -static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
> +static int ethtool_set_per_queue(struct net_device *dev,
> +				 void __user *useraddr, u32 sub_cmd)
>  {
>  	struct ethtool_per_queue_op per_queue_opt;
>  
>  	if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt)))
>  		return -EFAULT;
>  
> +	if (per_queue_opt.sub_command != sub_cmd)
> +		return -EINVAL;
> +
>  	switch (per_queue_opt.sub_command) {
>  	case ETHTOOL_GCOALESCE:
>  		return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt);
> @@ -2838,7 +2842,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
>  		rc = ethtool_get_phy_stats(dev, useraddr);
>  		break;
>  	case ETHTOOL_PERQUEUE:
> -		rc = ethtool_set_per_queue(dev, useraddr);
> +		rc = ethtool_set_per_queue(dev, useraddr, sub_cmd);
>  		break;
>  	case ETHTOOL_GLINKSETTINGS:
>  		rc = ethtool_get_link_ksettings(dev, useraddr);
> -- 
> 2.7.4
> 

^ permalink raw reply

* Re: [PATCH v2 net-next 17/23] net/namespace: Update rtnl_net_dumpid for strict data checking
From: Christian Brauner @ 2018-10-08 10:54 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, jbenc, stephen, David Ahern
In-Reply-To: <20181008031644.15989-18-dsahern@kernel.org>

On Sun, Oct 07, 2018 at 08:16:38PM -0700, David Ahern wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> Update rtnl_net_dumpid for strict data checking. If the flag is set,
> the dump request is expected to have an rtgenmsg struct as the header
> which has the family as the only element. No data may be appended.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>
> ---
>  net/core/net_namespace.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index 670c84b1bfc2..fefe72774aeb 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -853,6 +853,12 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
>  		.s_idx = cb->args[0],
>  	};
>  
> +	if (cb->strict_check &&

Hm, shouldn't this also verify that the passed header is indeed struct
rtgenmsg before checking whether there are any attributes?

> +	    nlmsg_attrlen(cb->nlh, sizeof(struct rtgenmsg))) {
> +			NL_SET_ERR_MSG(cb->extack, "Unknown data in network namespace id dump request");
> +			return -EINVAL;
> +	}
> +
>  	spin_lock_bh(&net->nsid_lock);
>  	idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
>  	spin_unlock_bh(&net->nsid_lock);
> -- 
> 2.11.0
> 

^ permalink raw reply

* Re: [PATCH v2 net-next 21/23] net/bridge: Update br_mdb_dump for strict data checking
From: Christian Brauner @ 2018-10-08 10:55 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, jbenc, stephen, David Ahern
In-Reply-To: <20181008031644.15989-22-dsahern@kernel.org>

On Sun, Oct 07, 2018 at 08:16:42PM -0700, David Ahern wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> Update br_mdb_dump for strict data checking. If the flag is set,
> the dump request is expected to have a br_port_msg struct as the
> header. All elements of the struct are expected to be 0 and no
> attributes can be appended.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>

Acked-by: Christian Brauner <christian@brauner.io>

> ---
>  net/bridge/br_mdb.c | 30 ++++++++++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
> index a4a848bf827b..a7ea2d431714 100644
> --- a/net/bridge/br_mdb.c
> +++ b/net/bridge/br_mdb.c
> @@ -162,6 +162,29 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
>  	return err;
>  }
>  
> +static int br_mdb_valid_dump_req(const struct nlmsghdr *nlh,
> +				 struct netlink_ext_ack *extack)
> +{
> +	struct br_port_msg *bpm;
> +
> +	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) {
> +		NL_SET_ERR_MSG_MOD(extack, "Invalid header for mdb dump request");
> +		return -EINVAL;
> +	}
> +
> +	bpm = nlmsg_data(nlh);
> +	if (bpm->ifindex) {
> +		NL_SET_ERR_MSG_MOD(extack, "Filtering by device index is not supported for mdb dump request");
> +		return -EINVAL;
> +	}
> +	if (nlmsg_attrlen(nlh, sizeof(*bpm))) {
> +		NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request");
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
>  static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  {
>  	struct net_device *dev;
> @@ -169,6 +192,13 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  	struct nlmsghdr *nlh = NULL;
>  	int idx = 0, s_idx;
>  
> +	if (cb->strict_check) {
> +		int err = br_mdb_valid_dump_req(cb->nlh, cb->extack);
> +
> +		if (err < 0)
> +			return err;
> +	}
> +
>  	s_idx = cb->args[0];
>  
>  	rcu_read_lock();
> -- 
> 2.11.0
> 

^ permalink raw reply

* Re: [PATCH v2 net-next 22/23] rtnetlink: Move input checking for rtnl_fdb_dump to helper
From: Christian Brauner @ 2018-10-08 11:01 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, jbenc, stephen, David Ahern
In-Reply-To: <20181008031644.15989-23-dsahern@kernel.org>

On Sun, Oct 07, 2018 at 08:16:43PM -0700, David Ahern wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> Move the existing input checking for rtnl_fdb_dump into a helper,
> valid_fdb_dump_legacy. This function will retain the current
> logic that works around the 2 headers that userspace has been
> allowed to send up to this point.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>

Acked-by: Christian Brauner <christian@brauner.io>

> ---
>  net/core/rtnetlink.c | 53 ++++++++++++++++++++++++++++++++--------------------
>  1 file changed, 33 insertions(+), 20 deletions(-)
> 
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index f6d2609cfa9f..c7509c789fb6 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -3799,22 +3799,13 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
>  }
>  EXPORT_SYMBOL(ndo_dflt_fdb_dump);
>  
> -static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
> +static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh,
> +				 int *br_idx, int *brport_idx,
> +				 struct netlink_ext_ack *extack)
>  {
> -	struct net_device *dev;
> +	struct ifinfomsg *ifm = nlmsg_data(nlh);

You could move this cast after the

if (nlmsg_len(nlh) != sizeof(struct ndmsg) &&
    (nlmsg_len(nlh) != sizeof(struct ndmsg) +

check. It doesn't matter that much but it minimizes the risk of someone
accidently accessing struct ifinfomsg *ifm when it's an invalid cast.


>  	struct nlattr *tb[IFLA_MAX+1];
> -	struct net_device *br_dev = NULL;
> -	const struct net_device_ops *ops = NULL;
> -	const struct net_device_ops *cops = NULL;
> -	struct ifinfomsg *ifm = nlmsg_data(cb->nlh);
> -	struct net *net = sock_net(skb->sk);
> -	struct hlist_head *head;
> -	int brport_idx = 0;
> -	int br_idx = 0;
> -	int h, s_h;
> -	int idx = 0, s_idx;
> -	int err = 0;
> -	int fidx = 0;
> +	int err;
>  
>  	/* A hack to preserve kernel<->userspace interface.
>  	 * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0.
> @@ -3823,20 +3814,42 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  	 * Fortunately these sizes don't conflict with the size of ifinfomsg
>  	 * with an optional attribute.
>  	 */
> -	if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) &&
> -	    (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) +
> +	if (nlmsg_len(nlh) != sizeof(struct ndmsg) &&
> +	    (nlmsg_len(nlh) != sizeof(struct ndmsg) +
>  	     nla_attr_size(sizeof(u32)))) {
> -		err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
> -				  IFLA_MAX, ifla_policy, cb->extack);
> +		err = nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
> +				  ifla_policy, extack);
>  		if (err < 0) {
>  			return -EINVAL;
>  		} else if (err == 0) {
>  			if (tb[IFLA_MASTER])
> -				br_idx = nla_get_u32(tb[IFLA_MASTER]);
> +				*br_idx = nla_get_u32(tb[IFLA_MASTER]);
>  		}
>  
> -		brport_idx = ifm->ifi_index;
> +		*brport_idx = ifm->ifi_index;
>  	}
> +	return 0;
> +}
> +
> +static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
> +{
> +	struct net_device *dev;
> +	struct net_device *br_dev = NULL;
> +	const struct net_device_ops *ops = NULL;
> +	const struct net_device_ops *cops = NULL;
> +	struct net *net = sock_net(skb->sk);
> +	struct hlist_head *head;
> +	int brport_idx = 0;
> +	int br_idx = 0;
> +	int h, s_h;
> +	int idx = 0, s_idx;
> +	int err = 0;
> +	int fidx = 0;
> +
> +	err = valid_fdb_dump_legacy(cb->nlh, &br_idx, &brport_idx,
> +				    cb->extack);
> +	if (err < 0)
> +		return err;
>  
>  	if (br_idx) {
>  		br_dev = __dev_get_by_index(net, br_idx);
> -- 
> 2.11.0
> 

^ permalink raw reply

* Re: [PATCH v2 net-next 23/23] rtnetlink: Update rtnl_fdb_dump for strict data checking
From: Christian Brauner @ 2018-10-08 11:02 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, jbenc, stephen, David Ahern
In-Reply-To: <20181008031644.15989-24-dsahern@kernel.org>

On Sun, Oct 07, 2018 at 08:16:44PM -0700, David Ahern wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> Update rtnl_fdb_dump for strict data checking. If the flag is set,
> the dump request is expected to have an ndmsg struct as the header
> potentially followed by one or more attributes. Any data passed in the
> header or as an attribute is taken as a request to influence the data
> returned. Only values supported by the dump handler are allowed to be
> non-0 or set in the request. At the moment only the NDA_IFINDEX and
> NDA_MASTER attributes are supported.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>

Acked-by: Christian Brauner <christian@brauner.io>

> ---
>  net/core/rtnetlink.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 60 insertions(+), 2 deletions(-)
> 
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index c7509c789fb6..c894c4af8981 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -3799,6 +3799,60 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
>  }
>  EXPORT_SYMBOL(ndo_dflt_fdb_dump);
>  
> +static int valid_fdb_dump_strict(const struct nlmsghdr *nlh,
> +				 int *br_idx, int *brport_idx,
> +				 struct netlink_ext_ack *extack)
> +{
> +	struct nlattr *tb[NDA_MAX + 1];
> +	struct ndmsg *ndm;
> +	int err, i;
> +
> +	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
> +		NL_SET_ERR_MSG(extack, "Invalid header for fdb dump request");
> +		return -EINVAL;
> +	}
> +
> +	ndm = nlmsg_data(nlh);
> +	if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
> +	    ndm->ndm_flags || ndm->ndm_type) {
> +		NL_SET_ERR_MSG(extack, "Invalid values in header for fbd dump request");
> +		return -EINVAL;
> +	}
> +
> +	err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
> +				 NULL, extack);
> +	if (err < 0)
> +		return err;
> +
> +	*brport_idx = ndm->ndm_ifindex;
> +	for (i = 0; i <= NDA_MAX; ++i) {
> +		if (!tb[i])
> +			continue;
> +
> +		switch (i) {
> +		case NDA_IFINDEX:
> +			if (nla_len(tb[i]) != sizeof(u32)) {
> +				NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in fdb dump request");
> +				return -EINVAL;
> +			}
> +			*brport_idx = nla_get_u32(tb[NDA_IFINDEX]);
> +			break;
> +		case NDA_MASTER:
> +			if (nla_len(tb[i]) != sizeof(u32)) {
> +				NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in fdb dump request");
> +				return -EINVAL;
> +			}
> +			*br_idx = nla_get_u32(tb[NDA_MASTER]);
> +			break;
> +		default:
> +			NL_SET_ERR_MSG(extack, "Unsupported attribute in fdb dump request");
> +			return -EINVAL;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh,
>  				 int *br_idx, int *brport_idx,
>  				 struct netlink_ext_ack *extack)
> @@ -3846,8 +3900,12 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  	int err = 0;
>  	int fidx = 0;
>  
> -	err = valid_fdb_dump_legacy(cb->nlh, &br_idx, &brport_idx,
> -				    cb->extack);
> +	if (cb->strict_check)
> +		err = valid_fdb_dump_strict(cb->nlh, &br_idx, &brport_idx,
> +					    cb->extack);
> +	else
> +		err = valid_fdb_dump_legacy(cb->nlh, &br_idx, &brport_idx,
> +					    cb->extack);
>  	if (err < 0)
>  		return err;
>  
> -- 
> 2.11.0
> 

^ permalink raw reply

* Re: [PATCH v2 net-next 00/23] rtnetlink: Add support for rigid checking of data in dump request
From: Christian Brauner @ 2018-10-08 11:04 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, jbenc, stephen, David Ahern
In-Reply-To: <20181008031644.15989-1-dsahern@kernel.org>

On Sun, Oct 07, 2018 at 08:16:21PM -0700, David Ahern wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> There are many use cases where a user wants to influence what is
> returned in a dump for some rtnetlink command: one is wanting data
> for a different namespace than the one the request is received and
> another is limiting the amount of data returned in the dump to a
> specific set of interest to userspace, reducing the cpu overhead of
> both kernel and userspace. Unfortunately, the kernel has historically
> not been strict with checking for the proper header or checking the
> values passed in the header. This lenient implementation has allowed
> iproute2 and other packages to pass any struct or data in the dump
> request as long as the family is the first byte. For example, ifinfomsg
> struct is used by iproute2 for all generic dump requests - links,
> addresses, routes and rules when it is really only valid for link
> requests.
> 
> There is 1 is example where the kernel deals with the wrong struct: link
> dumps after VF support was added. Older iproute2 was sending rtgenmsg as
> the header instead of ifinfomsg so a patch was added to try and detect
> old userspace vs new:
> e5eca6d41f53 ("rtnetlink: fix userspace API breakage for iproute2 < v3.9.0")
> 
> The latest example is Christian's patch set wanting to return addresses for
> a target namespace. It guesses the header struct is an ifaddrmsg and if it
> guesses wrong a netlink warning is generated in the kernel log on every
> address dump which is unacceptable.
> 
> Another example where the kernel is a bit lenient is route dumps: iproute2
> can send either a request with either ifinfomsg or a rtmsg as the header
> struct, yet the kernel always treats the header as an rtmsg (see
> inet_dump_fib and rtm_flags check). The header inconsistency impacts the
> ability to add kernel side filters for route dumps - a necessary feature
> for scale setups with 100k+ routes.
> 
> How to resolve the problem of not breaking old userspace yet be able to
> move forward with new features such as kernel side filtering which are
> crucial for efficient operation at high scale?
> 
> This patch set addresses the problem by adding a new socket flag,
> NETLINK_DUMP_STRICT_CHK, that userspace can use with setsockopt to
> request strict checking of headers and attributes on dump requests and
> hence unlock the ability to use kernel side filters as they are added.
> 
> Kernel side, the dump handlers are updated to verify the message contains
> at least the expected header struct:
>     RTM_GETLINK:       ifinfomsg
>     RTM_GETADDR:       ifaddrmsg
>     RTM_GETMULTICAST:  ifaddrmsg
>     RTM_GETANYCAST:    ifaddrmsg
>     RTM_GETADDRLABEL:  ifaddrlblmsg
>     RTM_GETROUTE:      rtmsg
>     RTM_GETSTATS:      if_stats_msg
>     RTM_GETNEIGH:      ndmsg
>     RTM_GETNEIGHTBL:   ndtmsg
>     RTM_GETNSID:       rtgenmsg
>     RTM_GETRULE:       fib_rule_hdr
>     RTM_GETNETCONF:    netconfmsg
>     RTM_GETMDB:        br_port_msg
> 
> And then every field in the header struct should be 0 with the exception
> of the family. There are a few exceptions to this rule where the kernel
> already influences the data returned by values in the struct. Next the
> message should not contain attributes unless the kernel implements
> filtering for it. Any unexpected data causes the dump to fail with EINVAL.
> If the new flag is honored by the kernel and the dump contents adjusted
> by any data passed in the request, the dump handler can set the
> NLM_F_DUMP_FILTERED flag in the netlink message header.
> 
> For old userspace on new kernel there is no impact as all checks are
> wrapped in a check on the new strict flag. For new userspace on old
> kernel, the data in the headers and any appended attributes are
> silently ignored though the setsockopt failing is the clue to userspace
> the feature is not supported. New userspace on new kernel gets the
> requested data dump.
> 
> iproute2 patches can be found here:
>     https://github.com/dsahern/iproute2 dump-enhancements
> 
> Major changes since v1
> - inner header is supposed to be 4-bytes aligned. So for dumps that
>   should not have attributes appended changed the check to use:
>         if (nlmsg_attrlen(nlh, sizeof(hdr)))
>   Only impacts patches with headers that are not multiples of 4-bytes
>   (rtgenmsg, netconfmsg), but applied the change to all patches not
>   calling nlmsg_parse for consistency.
> 
> - Added nlmsg_parse_strict and nla_parse_strict for tighter control on
>   attribute parsing. There should be no unknown attribute types or extra
>   bytes.
> 
> - Moved validation to a helper in most cases
> 
> Changes since rfc-v2
> - dropped the NLM_F_DUMP_FILTERED flag from target nsid dumps per
>   Jiri's objections
> - changed the opt-in uapi from a netlink message flag to a socket
>   flag. setsockopt provides an api for userspace to definitively
>   know if the kernel supports strict checking on dumps.
> - re-ordered patches to peel off the extack on dumps if needed to
>   keep this set size within limits
> - misc cleanups in patches based on testing
> 
> David Ahern (23):
>   netlink: Pass extack to dump handlers
>   netlink: Add extack message to nlmsg_parse for invalid header length
>   net: Add extack to nlmsg_parse
>   netlink: Add strict version of nlmsg_parse and nla_parse
>   net/ipv6: Refactor address dump to push inet6_fill_args to
>     in6_dump_addrs
>   netlink: Add new socket option to enable strict checking on dumps
>   net/ipv4: Update inet_dump_ifaddr for strict data checking
>   net/ipv6: Update inet6_dump_addr for strict data checking
>   rtnetlink: Update rtnl_dump_ifinfo for strict data checking
>   rtnetlink: Update rtnl_bridge_getlink for strict data checking
>   rtnetlink: Update rtnl_stats_dump for strict data checking
>   rtnetlink: Update inet6_dump_ifinfo for strict data checking
>   rtnetlink: Update ipmr_rtm_dumplink for strict data checking
>   rtnetlink: Update fib dumps for strict data checking
>   net/neighbor: Update neigh_dump_info for strict data checking
>   net/neighbor: Update neightbl_dump_info for strict data checking
>   net/namespace: Update rtnl_net_dumpid for strict data checking
>   net/fib_rules: Update fib_nl_dumprule for strict data checking
>   net/ipv6: Update ip6addrlbl_dump for strict data checking
>   net: Update netconf dump handlers for strict data checking
>   net/bridge: Update br_mdb_dump for strict data checking
>   rtnetlink: Move input checking for rtnl_fdb_dump to helper
>   rtnetlink: Update rtnl_fdb_dump for strict data checking

At this point it's all nits so it's got my ACK but keener eyes than mine
might see other issues.

Acked-by: Christian Brauner <christian@brauner.io>

> 
>  include/linux/netlink.h        |   2 +
>  include/net/ip_fib.h           |   2 +
>  include/net/netlink.h          |  21 ++-
>  include/uapi/linux/netlink.h   |   1 +
>  lib/nlattr.c                   |  48 +++++--
>  net/bridge/br_mdb.c            |  30 ++++
>  net/core/devlink.c             |   2 +-
>  net/core/fib_rules.c           |  36 ++++-
>  net/core/neighbour.c           | 119 ++++++++++++---
>  net/core/net_namespace.c       |   6 +
>  net/core/rtnetlink.c           | 318 ++++++++++++++++++++++++++++++++---------
>  net/ipv4/devinet.c             | 101 ++++++++++---
>  net/ipv4/fib_frontend.c        |  42 +++++-
>  net/ipv4/ipmr.c                |  39 +++++
>  net/ipv6/addrconf.c            | 177 ++++++++++++++++++-----
>  net/ipv6/addrlabel.c           |  34 ++++-
>  net/ipv6/ip6_fib.c             |   8 ++
>  net/ipv6/ip6mr.c               |   9 ++
>  net/ipv6/route.c               |   2 +-
>  net/mpls/af_mpls.c             |  28 +++-
>  net/netfilter/ipvs/ip_vs_ctl.c |   2 +-
>  net/netlink/af_netlink.c       |  33 ++++-
>  net/netlink/af_netlink.h       |   1 +
>  net/sched/act_api.c            |   2 +-
>  net/sched/cls_api.c            |   6 +-
>  net/sched/sch_api.c            |   2 +-
>  net/xfrm/xfrm_user.c           |   2 +-
>  27 files changed, 908 insertions(+), 165 deletions(-)
> 
> -- 
> 2.11.0
> 

^ permalink raw reply

* Re: [PATCH] ethtool: fix a privilege escalation bug
From: Michal Kubecek @ 2018-10-08 18:19 UTC (permalink / raw)
  To: Wenwen Wang
  Cc: Kangjie Lu, David S. Miller, Florian Fainelli, Kees Cook,
	Andrew Lunn, Edward Cree, Ilya Lesokhin, Yury Norov, Alan Brady,
	Stephen Hemminger, open list:NETWORKING [GENERAL], open list
In-Reply-To: <1539013777-1625-1-git-send-email-wang6495@umn.edu>

On Mon, Oct 08, 2018 at 10:49:35AM -0500, Wenwen Wang wrote:
> In dev_ethtool(), the eth command 'ethcmd' is firstly copied from the
> use-space buffer 'useraddr' and checked to see whether it is
> ETHTOOL_PERQUEUE. If yes, the sub-command 'sub_cmd' is further copied from
> the user space. Otherwise, 'sub_cmd' is the same as 'ethcmd'. Next,
> according to 'sub_cmd', a permission check is enforced through the function
> ns_capable(). For example, the permission check is required if 'sub_cmd' is
> ETHTOOL_SCOALESCE, but it is not necessary if 'sub_cmd' is
> ETHTOOL_GCOALESCE, as suggested in the comment "Allow some commands to be
> done by anyone". The following execution invokes different handlers
> according to 'ethcmd'. Specifically, if 'ethcmd' is ETHTOOL_PERQUEUE,
> ethtool_set_per_queue() is called. In ethtool_set_per_queue(), the kernel
> object 'per_queue_opt' is copied again from the user-space buffer
> 'useraddr' and 'per_queue_opt.sub_command' is used to determine which
> operation should be performed. Given that the buffer 'useraddr' is in the
> user space, a malicious user can race to change the sub-command between the
> two copies. In particular, the attacker can supply ETHTOOL_PERQUEUE and
> ETHTOOL_GCOALESCE to bypass the permission check in dev_ethtool(). Then
> before ethtool_set_per_queue() is called, the attacker changes
> ETHTOOL_GCOALESCE to ETHTOOL_SCOALESCE. In this way, the attacker can
> bypass the permission check and execute ETHTOOL_SCOALESCE.
> 
> This patch enforces a check in ethtool_set_per_queue() after the second
> copy from 'useraddr'. If the sub-command is different from the one obtained
> in the first copy in dev_ethtool(), an error code EINVAL will be returned.
> 

Fixes: f38d138a7da6 ("net/ethtool: support set coalesce per queue")

> Signed-off-by: Wenwen Wang <wang6495@umn.edu>
> ---
>  net/core/ethtool.c | 8 ++++++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/net/core/ethtool.c b/net/core/ethtool.c
> index c9993c6..ccb337e 100644
> --- a/net/core/ethtool.c
> +++ b/net/core/ethtool.c
> @@ -2462,13 +2462,17 @@ static int ethtool_set_per_queue_coalesce(struct net_device *dev,
>  	return ret;
>  }
>  
> -static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
> +static int ethtool_set_per_queue(struct net_device *dev,
> +				 void __user *useraddr, u32 sub_cmd)
>  {
>  	struct ethtool_per_queue_op per_queue_opt;
>  
>  	if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt)))
>  		return -EFAULT;
>  
> +	if (per_queue_opt.sub_command != sub_cmd)
> +		return -EINVAL;
> +
>  	switch (per_queue_opt.sub_command) {
>  	case ETHTOOL_GCOALESCE:
>  		return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt);
> @@ -2838,7 +2842,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
>  		rc = ethtool_get_phy_stats(dev, useraddr);
>  		break;
>  	case ETHTOOL_PERQUEUE:
> -		rc = ethtool_set_per_queue(dev, useraddr);
> +		rc = ethtool_set_per_queue(dev, useraddr, sub_cmd);
>  		break;
>  	case ETHTOOL_GLINKSETTINGS:
>  		rc = ethtool_get_link_ksettings(dev, useraddr);
> -- 
> 2.7.4
> 

^ permalink raw reply

* Re: [PATCH] bpf: fix building without CONFIG_INET
From: Joe Stringer @ 2018-10-08 18:24 UTC (permalink / raw)
  To: daniel
  Cc: liu.song.a23, arnd, ast, David Miller, john fastabend,
	Martin KaFai Lau, makita.toshiaki, brakmo, rdna, Jesper Brouer,
	jakub.kicinski, m.xhonneux, dsahern, netdev, LKML
In-Reply-To: <9330215e-27bf-9913-f7f9-6b5b83542a27@iogearbox.net>

On Mon, 8 Oct 2018 at 01:41, Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 10/05/2018 09:07 PM, Daniel Borkmann wrote:
> > On 10/05/2018 09:02 PM, Song Liu wrote:
> > [...]
> >> BPF_CALL_x() has static already (before this patch). We should not
> >> need change that
> >> for all the BPF_CALL_?(). Joe's version looks better to me.
> >
> > My preference as well, thanks!
>
> ping, can someone send an updated patch?

I will send a fresh version shortly.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox