Netdev List

Netdev List
 help / color / mirror / Atom feed

* [RFC bpf-next 6/7] selftests/bpf: add flow dissector bpf_skb_load_bytes helper test
From: Stanislav Fomichev @ 2019-02-05 17:36 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, simon.horman, willemb, Stanislav Fomichev
In-Reply-To: <20190205173629.160717-1-sdf@google.com>

With the on-stack skb, we want to make sure we don't trigger any
shinfo access. Add small test which tries to read the data past
the packet boundary.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 tools/testing/selftests/bpf/test_progs.c | 49 ++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index c52bd90fbb34..c12f61efc427 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1995,6 +1995,54 @@ static void test_flow_dissector(void)
 	bpf_object__close(obj);
 }
 
+static void test_flow_dissector_load_bytes(void)
+{
+	struct bpf_flow_keys flow_keys;
+	__u32 duration, retval, size;
+	struct bpf_insn prog[] = {
+		// BPF_REG_1 - 1st argument: context
+		// BPF_REG_2 - 2nd argument: offset, start at last byte + 1
+		BPF_MOV64_IMM(BPF_REG_2, sizeof(pkt_v4)),
+		// BPF_REG_3 - 3rd argument: destination, reserve byte on stack
+		BPF_ALU64_REG(BPF_MOV, BPF_REG_3, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -1),
+		// BPF_REG_4 - 4th argument: copy one byte
+		BPF_MOV64_IMM(BPF_REG_4, 1),
+		// bpf_skb_load_bytes(ctx, sizeof(pkt_v4), ptr, 1)
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_skb_load_bytes),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+		// if (ret == 0) return BPF_DROP (2)
+		BPF_MOV64_IMM(BPF_REG_0, BPF_DROP),
+		BPF_EXIT_INSN(),
+		// if (ret != 0) return BPF_OK (0)
+		BPF_MOV64_IMM(BPF_REG_0, BPF_OK),
+		BPF_EXIT_INSN(),
+	};
+	int fd, err;
+
+	/* make sure bpf_skb_load_bytes helper doesn't cause any
+	 * problems when used with the fake skb in the flow
+	 * dissector (try to read past the last byte)
+	 */
+	fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
+			      ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+	CHECK(fd < 0,
+	      "flow_dissector-bpf_skb_load_bytes-load",
+	      "fd %d errno %d\n",
+	      fd, errno);
+
+	err = bpf_prog_test_run(fd, 1, &pkt_v4, sizeof(pkt_v4),
+				&flow_keys, &size, &retval, &duration);
+	CHECK(size != sizeof(flow_keys) || err || retval != 1,
+	      "flow_dissector-bpf_skb_load_bytes",
+	      "err %d errno %d retval %d duration %d size %u/%lu\n",
+	      err, errno, retval, duration, size, sizeof(flow_keys));
+
+	if (fd >= -1)
+		close(fd);
+}
+
 static void *test_spin_lock(void *arg)
 {
 	__u32 duration, retval;
@@ -2136,6 +2184,7 @@ int main(void)
 	test_queue_stack_map(QUEUE);
 	test_queue_stack_map(STACK);
 	test_flow_dissector();
+	test_flow_dissector_load_bytes();
 	test_spinlock();
 	test_map_lock();
 
-- 
2.20.1.611.gfbb209baf1-goog


^ permalink raw reply related

* [RFC bpf-next 5/7] bpf: when doing BPF_PROG_TEST_RUN for flow dissector use no-skb mode
From: Stanislav Fomichev @ 2019-02-05 17:36 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, simon.horman, willemb, Stanislav Fomichev
In-Reply-To: <20190205173629.160717-1-sdf@google.com>

Now that we have __flow_bpf_dissect which works on raw data (by
constructing temporary on-stack skb), use it when doing
BPF_PROG_TEST_RUN for flow dissector.

This should help us catch any possible bugs due to missing shinfo on
the on-stack skb.

Note that existing __skb_flow_bpf_dissect swallows L2 headers and returns
nhoff=0, we need to preserve the existing behavior.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 net/bpf/test_run.c | 52 +++++++++++++++-------------------------------
 1 file changed, 17 insertions(+), 35 deletions(-)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2c5172b33209..502ae0e866d3 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -249,10 +249,8 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	u32 repeat = kattr->test.repeat;
 	struct bpf_flow_keys flow_keys;
 	u64 time_start, time_spent = 0;
-	struct bpf_skb_data_end *cb;
+	const struct ethhdr *eth;
 	u32 retval, duration;
-	struct sk_buff *skb;
-	struct sock *sk;
 	void *data;
 	int ret;
 	u32 i;
@@ -260,35 +258,14 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
 		return -EINVAL;
 
-	data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
-			     SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+	if (size < ETH_HLEN)
+		return -EINVAL;
+
+	data = bpf_test_init(kattr, size, 0, 0);
 	if (IS_ERR(data))
 		return PTR_ERR(data);
 
-	sk = kzalloc(sizeof(*sk), GFP_USER);
-	if (!sk) {
-		kfree(data);
-		return -ENOMEM;
-	}
-	sock_net_set(sk, current->nsproxy->net_ns);
-	sock_init_data(NULL, sk);
-
-	skb = build_skb(data, 0);
-	if (!skb) {
-		kfree(data);
-		kfree(sk);
-		return -ENOMEM;
-	}
-	skb->sk = sk;
-
-	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
-	__skb_put(skb, size);
-	skb->protocol = eth_type_trans(skb,
-				       current->nsproxy->net_ns->loopback_dev);
-	skb_reset_network_header(skb);
-
-	cb = (struct bpf_skb_data_end *)skb->cb;
-	cb->qdisc_cb.flow_keys = &flow_keys;
+	eth = (struct ethhdr *)data;
 
 	if (!repeat)
 		repeat = 1;
@@ -297,9 +274,15 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	for (i = 0; i < repeat; i++) {
 		preempt_disable();
 		rcu_read_lock();
-		retval = __skb_flow_bpf_dissect(prog, skb,
-						&flow_keys_dissector,
-						&flow_keys);
+		retval = __flow_bpf_dissect(prog, data,
+					    eth->h_proto, ETH_HLEN,
+					    size,
+					    &flow_keys_dissector,
+					    &flow_keys);
+		if (flow_keys.nhoff >= ETH_HLEN)
+			flow_keys.nhoff -= ETH_HLEN;
+		if (flow_keys.thoff >= ETH_HLEN)
+			flow_keys.thoff -= ETH_HLEN;
 		rcu_read_unlock();
 		preempt_enable();
 
@@ -317,8 +300,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 
 	ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
 			      retval, duration);
-
-	kfree_skb(skb);
-	kfree(sk);
+	kfree(data);
 	return ret;
+
 }
-- 
2.20.1.611.gfbb209baf1-goog


^ permalink raw reply related

* [RFC bpf-next 4/7] net: flow_dissector: handle no-skb use case
From: Stanislav Fomichev @ 2019-02-05 17:36 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, simon.horman, willemb, Stanislav Fomichev
In-Reply-To: <20190205173629.160717-1-sdf@google.com>

When flow_dissector is called without skb (with only data and hlen),
construct on-stack skb (which has a linear chunk of data passed
to the flow dissector). This should let us handle eth_get_headlen
case where only data is provided and we don't want to (yet) allocate
an skb.

Since this on-stack skb doesn't allocate its own data, we can't
add shinfo and need to be careful to avoid any code paths that use
it. Flow dissector BPF programs can only call bpf_skb_load_bytes helper,
which doesn't touch shinfo in our case (skb->len is the length of the
linear header so it exits early).

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/skbuff.h    |  5 +++
 net/core/flow_dissector.c | 95 +++++++++++++++++++++++++++++----------
 2 files changed, 76 insertions(+), 24 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index aa9a9983de80..5f1c085cb34c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1227,6 +1227,11 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
 			    const struct sk_buff *skb,
 			    struct flow_dissector *flow_dissector,
 			    struct bpf_flow_keys *flow_keys);
+bool __flow_bpf_dissect(struct bpf_prog *prog,
+			void *data, __be16 proto,
+			int nhoff, int hlen,
+			struct flow_dissector *flow_dissector,
+			struct bpf_flow_keys *flow_keys);
 bool __skb_flow_dissect(struct net *net,
 			const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index dddcc37c0462..87167b74f59a 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -683,6 +683,28 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
 	}
 }
 
+static inline void init_flow_keys(struct bpf_flow_keys *flow_keys,
+				  struct sk_buff *skb, int nhoff)
+{
+	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+	memset(cb, 0, sizeof(*cb));
+	memset(flow_keys, 0, sizeof(*flow_keys));
+
+	flow_keys->nhoff = nhoff;
+	flow_keys->thoff = nhoff;
+
+	cb->qdisc_cb.flow_keys = flow_keys;
+}
+
+static inline void clamp_flow_keys(struct bpf_flow_keys *flow_keys,
+				   int hlen)
+{
+	flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, hlen);
+	flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
+				   flow_keys->nhoff, hlen);
+}
+
 bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
 			    const struct sk_buff *skb,
 			    struct flow_dissector *flow_dissector,
@@ -702,13 +724,9 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
 
 	/* Save Control Block */
 	memcpy(&cb_saved, cb, sizeof(cb_saved));
-	memset(cb, 0, sizeof(*cb));
 
 	/* Pass parameters to the BPF program */
-	memset(flow_keys, 0, sizeof(*flow_keys));
-	cb->qdisc_cb.flow_keys = flow_keys;
-	flow_keys->nhoff = skb_network_offset(skb);
-	flow_keys->thoff = flow_keys->nhoff;
+	init_flow_keys(flow_keys, skb, skb_network_offset(skb));
 
 	bpf_compute_data_pointers((struct sk_buff *)skb);
 	result = BPF_PROG_RUN(prog, skb);
@@ -716,9 +734,34 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
 	/* Restore state */
 	memcpy(cb, &cb_saved, sizeof(cb_saved));
 
-	flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, skb->len);
-	flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
-				   flow_keys->nhoff, skb->len);
+	clamp_flow_keys(flow_keys, skb->len);
+
+	return result == BPF_OK;
+}
+
+bool __flow_bpf_dissect(struct bpf_prog *prog,
+			void *data, __be16 proto,
+			int nhoff, int hlen,
+			struct flow_dissector *flow_dissector,
+			struct bpf_flow_keys *flow_keys)
+{
+	struct bpf_skb_data_end *cb;
+	struct sk_buff skb;
+	u32 result;
+
+	__init_skb(&skb, data, hlen);
+	skb_put(&skb, hlen);
+	skb.protocol = proto;
+
+	init_flow_keys(flow_keys, &skb, nhoff);
+
+	cb = (struct bpf_skb_data_end *)skb.cb;
+	cb->data_meta = skb.data;
+	cb->data_end  = skb.data + skb_headlen(&skb);
+
+	result = BPF_PROG_RUN(prog, &skb);
+
+	clamp_flow_keys(flow_keys, hlen);
 
 	return result == BPF_OK;
 }
@@ -754,8 +797,10 @@ bool __skb_flow_dissect(struct net *net,
 	struct flow_dissector_key_icmp *key_icmp;
 	struct flow_dissector_key_tags *key_tags;
 	struct flow_dissector_key_vlan *key_vlan;
-	enum flow_dissect_ret fdret;
 	enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
+	struct bpf_prog *attached = NULL;
+	struct bpf_flow_keys flow_keys;
+	enum flow_dissect_ret fdret;
 	int num_hdrs = 0;
 	u8 ip_proto = 0;
 	bool ret;
@@ -795,30 +840,32 @@ bool __skb_flow_dissect(struct net *net,
 					      FLOW_DISSECTOR_KEY_BASIC,
 					      target_container);
 
-	if (skb) {
-		struct bpf_flow_keys flow_keys;
-		struct bpf_prog *attached = NULL;
+	rcu_read_lock();
 
-		rcu_read_lock();
+	if (!net && skb)
+		net = skb_net(skb);
+	if (net)
+		attached = rcu_dereference(net->flow_dissector_prog);
 
-		if (!net && skb)
-			net = skb_net(skb);
-		if (net)
-			attached = rcu_dereference(net->flow_dissector_prog);
-		WARN_ON_ONCE(!net);
+	WARN_ON_ONCE(!net);
 
-		if (attached) {
+	if (attached) {
+		if (skb)
 			ret = __skb_flow_bpf_dissect(attached, skb,
 						     flow_dissector,
 						     &flow_keys);
-			__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
-						 target_container);
-			rcu_read_unlock();
-			return ret;
-		}
+		else
+			ret = __flow_bpf_dissect(attached, data, proto, nhoff,
+						 hlen, flow_dissector,
+						 &flow_keys);
+		__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
+					 target_container);
 		rcu_read_unlock();
+		return ret;
 	}
 
+	rcu_read_unlock();
+
 	if (dissector_uses_key(flow_dissector,
 			       FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
 		struct ethhdr *eth = eth_hdr(skb);
-- 
2.20.1.611.gfbb209baf1-goog


^ permalink raw reply related

* [RFC bpf-next 3/7] net: plumb network namespace into __skb_flow_dissect
From: Stanislav Fomichev @ 2019-02-05 17:36 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, simon.horman, willemb, Stanislav Fomichev
In-Reply-To: <20190205173629.160717-1-sdf@google.com>

This new argument will be used in the next patches for the
eth_get_headlen use case. eth_get_headlen calls flow dissector
with only data (without skb) so there is currently no way to
pull attached BPF flow dissector program. With this new argument,
we can amend the callers to explicitly pass network namespace
so we can use attached BPF program.

Note: WARN_ON_ONCE(!net) will now trigger for eth_get_headlen users.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/skbuff.h    | 15 +++++++++------
 net/core/flow_dissector.c | 20 +++++++++++---------
 net/ethernet/eth.c        |  5 +++--
 3 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 28723a86efdf..aa9a9983de80 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1227,7 +1227,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
 			    const struct sk_buff *skb,
 			    struct flow_dissector *flow_dissector,
 			    struct bpf_flow_keys *flow_keys);
-bool __skb_flow_dissect(const struct sk_buff *skb,
+bool __skb_flow_dissect(struct net *net,
+			const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
 			void *target_container,
 			void *data, __be16 proto, int nhoff, int hlen,
@@ -1237,7 +1238,7 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb,
 				    struct flow_dissector *flow_dissector,
 				    void *target_container, unsigned int flags)
 {
-	return __skb_flow_dissect(skb, flow_dissector, target_container,
+	return __skb_flow_dissect(NULL, skb, flow_dissector, target_container,
 				  NULL, 0, 0, 0, flags);
 }
 
@@ -1246,18 +1247,19 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
 					      unsigned int flags)
 {
 	memset(flow, 0, sizeof(*flow));
-	return __skb_flow_dissect(skb, &flow_keys_dissector, flow,
+	return __skb_flow_dissect(NULL, skb, &flow_keys_dissector, flow,
 				  NULL, 0, 0, 0, flags);
 }
 
 static inline bool
-skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb,
+skb_flow_dissect_flow_keys_basic(struct net *net,
+				 const struct sk_buff *skb,
 				 struct flow_keys_basic *flow, void *data,
 				 __be16 proto, int nhoff, int hlen,
 				 unsigned int flags)
 {
 	memset(flow, 0, sizeof(*flow));
-	return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow,
+	return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow,
 				  data, proto, nhoff, hlen, flags);
 }
 
@@ -2438,7 +2440,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb,
 	if (skb_transport_header_was_set(skb))
 		return;
 
-	if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
+	if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
+					     NULL, 0, 0, 0, 0))
 		skb_set_transport_header(skb, keys.control.thoff);
 	else
 		skb_set_transport_header(skb, offset_hint);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index bb1a54747d64..dddcc37c0462 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -725,6 +725,7 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
 
 /**
  * __skb_flow_dissect - extract the flow_keys struct and return it
+ * @net: associated network namespace, if NULL pulled from skb
  * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
  * @flow_dissector: list of keys to dissect
  * @target_container: target structure to put dissected values into
@@ -739,7 +740,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
  *
  * Caller must take care of zeroing target container memory.
  */
-bool __skb_flow_dissect(const struct sk_buff *skb,
+bool __skb_flow_dissect(struct net *net,
+			const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
 			void *target_container,
 			void *data, __be16 proto, int nhoff, int hlen,
@@ -799,12 +801,11 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 
 		rcu_read_lock();
 
-		if (skb->dev)
-			attached = rcu_dereference(dev_net(skb->dev)->flow_dissector_prog);
-		else if (skb->sk)
-			attached = rcu_dereference(sock_net(skb->sk)->flow_dissector_prog);
-		else
-			WARN_ON_ONCE(1);
+		if (!net && skb)
+			net = skb_net(skb);
+		if (net)
+			attached = rcu_dereference(net->flow_dissector_prog);
+		WARN_ON_ONCE(!net);
 
 		if (attached) {
 			ret = __skb_flow_bpf_dissect(attached, skb,
@@ -1406,7 +1407,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
 	__flow_hash_secret_init();
 
 	memset(&keys, 0, sizeof(keys));
-	__skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
+	__skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric, &keys,
 			   NULL, 0, 0, 0,
 			   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
 
@@ -1508,7 +1509,8 @@ u32 skb_get_poff(const struct sk_buff *skb)
 {
 	struct flow_keys_basic keys;
 
-	if (!skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
+	if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
+					      NULL, 0, 0, 0, 0))
 		return 0;
 
 	return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 4c520110b04f..155d55025bfc 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -136,8 +136,9 @@ u32 eth_get_headlen(void *data, unsigned int len)
 		return len;
 
 	/* parse any remaining L2/L3 headers, check for L4 */
-	if (!skb_flow_dissect_flow_keys_basic(NULL, &keys, data, eth->h_proto,
-					      sizeof(*eth), len, flags))
+	if (!skb_flow_dissect_flow_keys_basic(NULL, NULL, &keys, data,
+					      eth->h_proto, sizeof(*eth),
+					      len, flags))
 		return max_t(u32, keys.control.thoff, sizeof(*eth));
 
 	/* parse for any L4 headers */
-- 
2.20.1.611.gfbb209baf1-goog


^ permalink raw reply related

* [RFC bpf-next 2/7] net: introduce skb_net helper
From: Stanislav Fomichev @ 2019-02-05 17:36 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, simon.horman, willemb, Stanislav Fomichev
In-Reply-To: <20190205173629.160717-1-sdf@google.com>

skb_net returns network namespace from the associated device or socket.

This will be used in the next commit.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/skbuff.h |  2 ++
 net/core/skbuff.c      | 10 ++++++++++
 2 files changed, 12 insertions(+)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ad883ab2762c..28723a86efdf 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4343,5 +4343,7 @@ static inline __wsum lco_csum(struct sk_buff *skb)
 	return csum_partial(l4_hdr, csum_start - l4_hdr, partial);
 }
 
+struct net *skb_net(const struct sk_buff *skb);
+
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 23c9cf100bd4..016db13fa2b6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5585,6 +5585,16 @@ void skb_condense(struct sk_buff *skb)
 	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
 }
 
+struct net *skb_net(const struct sk_buff *skb)
+{
+	if (skb->dev)
+		return dev_net(skb->dev);
+	else if (skb->sk)
+		return sock_net(skb->sk);
+	return NULL;
+}
+EXPORT_SYMBOL(skb_net);
+
 #ifdef CONFIG_SKB_EXTENSIONS
 static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
 {
-- 
2.20.1.611.gfbb209baf1-goog


^ permalink raw reply related

* [RFC bpf-next 1/7] net: introduce __init_skb and __init_skb_shinfo helpers
From: Stanislav Fomichev @ 2019-02-05 17:36 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, simon.horman, willemb, Stanislav Fomichev
In-Reply-To: <20190205173629.160717-1-sdf@google.com>

__init_skb is essentially a version of __build_skb which accepts skb as
an argument (instead of doing kmem_cache_alloc to allocate it).

__init_skb_shinfo initializes shinfo.

No functional changes.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/skbuff.h |  1 +
 net/core/skbuff.c      | 68 ++++++++++++++++++++----------------------
 2 files changed, 33 insertions(+), 36 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 831846617d07..ad883ab2762c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1001,6 +1001,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
 bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 		      bool *fragstolen, int *delta_truesize);
 
+void __init_skb(struct sk_buff *skb, u8 *data, unsigned int size);
 struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
 			    int node);
 struct sk_buff *__build_skb(void *data, unsigned int frag_size);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 26d848484912..23c9cf100bd4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -160,6 +160,34 @@ static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
  *
  */
 
+void __init_skb(struct sk_buff *skb, u8 *data, unsigned int size)
+{
+	/* Only clear those fields we need to clear, not those that we will
+	 * actually initialise below. Hence, don't put any more fields after
+	 * the tail pointer in struct sk_buff!
+	 */
+	memset(skb, 0, offsetof(struct sk_buff, tail));
+	/* Account for allocated memory : skb + skb->head */
+	skb->truesize = SKB_TRUESIZE(size);
+	refcount_set(&skb->users, 1);
+	skb->head = data;
+	skb->data = data;
+	skb_reset_tail_pointer(skb);
+	skb->end = skb->tail + size;
+	skb->mac_header = (typeof(skb->mac_header))~0U;
+	skb->transport_header = (typeof(skb->transport_header))~0U;
+}
+
+static inline void __init_skb_shinfo(struct sk_buff *skb)
+{
+	struct skb_shared_info *shinfo;
+
+	/* make sure we initialize shinfo sequentially */
+	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+	atomic_set(&shinfo->dataref, 1);
+}
+
 /**
  *	__alloc_skb	-	allocate a network buffer
  *	@size: size to allocate
@@ -181,7 +209,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 			    int flags, int node)
 {
 	struct kmem_cache *cache;
-	struct skb_shared_info *shinfo;
 	struct sk_buff *skb;
 	u8 *data;
 	bool pfmemalloc;
@@ -215,27 +242,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	size = SKB_WITH_OVERHEAD(ksize(data));
 	prefetchw(data + size);
 
-	/*
-	 * Only clear those fields we need to clear, not those that we will
-	 * actually initialise below. Hence, don't put any more fields after
-	 * the tail pointer in struct sk_buff!
-	 */
-	memset(skb, 0, offsetof(struct sk_buff, tail));
-	/* Account for allocated memory : skb + skb->head */
-	skb->truesize = SKB_TRUESIZE(size);
+	__init_skb(skb, data, size);
+	__init_skb_shinfo(skb);
 	skb->pfmemalloc = pfmemalloc;
-	refcount_set(&skb->users, 1);
-	skb->head = data;
-	skb->data = data;
-	skb_reset_tail_pointer(skb);
-	skb->end = skb->tail + size;
-	skb->mac_header = (typeof(skb->mac_header))~0U;
-	skb->transport_header = (typeof(skb->transport_header))~0U;
-
-	/* make sure we initialize shinfo sequentially */
-	shinfo = skb_shinfo(skb);
-	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
-	atomic_set(&shinfo->dataref, 1);
 
 	if (flags & SKB_ALLOC_FCLONE) {
 		struct sk_buff_fclones *fclones;
@@ -277,7 +286,6 @@ EXPORT_SYMBOL(__alloc_skb);
  */
 struct sk_buff *__build_skb(void *data, unsigned int frag_size)
 {
-	struct skb_shared_info *shinfo;
 	struct sk_buff *skb;
 	unsigned int size = frag_size ? : ksize(data);
 
@@ -287,20 +295,8 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
 
 	size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
-	memset(skb, 0, offsetof(struct sk_buff, tail));
-	skb->truesize = SKB_TRUESIZE(size);
-	refcount_set(&skb->users, 1);
-	skb->head = data;
-	skb->data = data;
-	skb_reset_tail_pointer(skb);
-	skb->end = skb->tail + size;
-	skb->mac_header = (typeof(skb->mac_header))~0U;
-	skb->transport_header = (typeof(skb->transport_header))~0U;
-
-	/* make sure we initialize shinfo sequentially */
-	shinfo = skb_shinfo(skb);
-	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
-	atomic_set(&shinfo->dataref, 1);
+	__init_skb(skb, data, size);
+	__init_skb_shinfo(skb);
 
 	return skb;
 }
-- 
2.20.1.611.gfbb209baf1-goog


^ permalink raw reply related

* [RFC bpf-next 0/7] net: flow_dissector: trigger BPF hook when called from eth_get_headlen
From: Stanislav Fomichev @ 2019-02-05 17:36 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, simon.horman, willemb, Stanislav Fomichev

Currently, when eth_get_headlen calls flow dissector, it doesn't pass any
skb. Because we use passed skb to lookup associated networking namespace
to find whether we have a BPF program attached or not, we always use
C-based flow dissector in this case.

The goal of this patch series is to add new networking namespace argument
to the eth_get_headlen and make BPF flow dissector programs be able to
work in the skb-less case.

The series goes like this:
1. introduce __init_skb and __init_skb_shinfo; those will be used to
   initialize temporary skb
2. introduce skb_net which can be used to get networking namespace
   associated with an skb
3. add new optional network namespace argument to __skb_flow_dissect and
   plumb through the callers
4. add new __flow_bpf_dissect which constructs temporary on-stack skb
   (using __init_skb) and calls BPF flow dissector program
5. convert flow dissector BPF_PROG_TEST_RUN to skb-less mode to show that
   it works
6. add selftest that makes sure going over the packet bounds in
   bpf_skb_load_bytes with on-stack skb doesn't cause any problems
7. add new net namespace argument go eth_get_headlen and convert the
   callers

Stanislav Fomichev (7):
  net: introduce __init_skb and __init_skb_shinfo helpers
  net: introduce skb_net helper
  net: plumb network namespace into __skb_flow_dissect
  net: flow_dissector: handle no-skb use case
  bpf: when doing BPF_PROG_TEST_RUN for flow dissector use no-skb mode
  selftests/bpf: add flow dissector bpf_skb_load_bytes helper test
  net: flow_dissector: pass net argument to the eth_get_headlen

 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |   2 +-
 drivers/net/ethernet/hisilicon/hns/hns_enet.c |   3 +-
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   |   3 +-
 drivers/net/ethernet/intel/fm10k/fm10k_main.c |   2 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   |   3 +-
 drivers/net/ethernet/intel/iavf/iavf_txrx.c   |   2 +-
 drivers/net/ethernet/intel/ice/ice_txrx.c     |   2 +-
 drivers/net/ethernet/intel/igb/igb_main.c     |   2 +-
 drivers/net/ethernet/intel/igc/igc_main.c     |   2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   2 +-
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c |   3 +-
 .../net/ethernet/mellanox/mlx5/core/en_tx.c   |   3 +-
 drivers/net/tun.c                             |   3 +-
 include/linux/etherdevice.h                   |   2 +-
 include/linux/skbuff.h                        |  23 +++-
 net/bpf/test_run.c                            |  52 +++------
 net/core/flow_dissector.c                     | 105 +++++++++++++-----
 net/core/skbuff.c                             |  78 +++++++------
 net/ethernet/eth.c                            |   8 +-
 tools/testing/selftests/bpf/test_progs.c      |  49 ++++++++
 20 files changed, 227 insertions(+), 122 deletions(-)

-- 
2.20.1.611.gfbb209baf1-goog

^ permalink raw reply

* Kernel panic in eth_header
From: Andrew @ 2019-02-05 16:09 UTC (permalink / raw)
  To: Netdev

Hi all.

After upgrade on PPPoE BRAS to kernel 4.9.153 I've got an kernel panic 
after a 3 days of uptime.

Unfortunately kernel is compiled w/o debug info; I rebuilt kernel with 
debug info enabled (kernel is compiled with same function addresses - I 
compare vmlinux symbol maps) - it says that panic is in 
net/ethernet/eth.c:88

Below there is a kernel panic trace. igb is from upstream, ver. 5.3.5.4. 
What extra info is needed?

[263565.106441] BUG: unable to handle kernel paging request at 
ffff88015a4d2dd4
[263565.113527] IP: [<ffffffff8158e48b>] eth_header+0x3b/0xc0
[263565.119030] PGD 1e8f067 [263565.121474] PUD 0
[263565.123580]
[263565.125166] Oops: 0002 [#1] SMP
[263565.128398] Modules linked in: xt_nat iptable_nat nf_conntrack_ipv4 
nf_defrag_ipv4 nf_nat_ipv4 iptable_filter xt_length xt_TCPMSS xt_tcpudp 
xt_mark xt_dscp iptable_mangle ip_tables x_tables nf_nat_pptp 
nf_conntrack_pptp nf_conntrack_proto_gre nf_nat_proto_gre nf_nat 
nf_conntrack sch_sfq sch_htb cls_u32 sch_ingress sch_prio sch_tbf 
cls_flow cls_fw act_police ifb 8021q mrp garp stp llc softdog pppoe 
pppox ppp_generic slhc i2c_nforce2 i2c_core igb(O) parport_pc dca 
parport thermal asus_atk0110 fan ptp k10temp hwmon pps_core nv_tco
[263565.176083] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G           O    
4.9.153-x86_64 #1
[263565.183996] Hardware name: System manufacturer System Product 
Name/M2N-E, BIOS ASUS M2N-E ACPI BIOS Revision 5001 03/23/2010
[263565.195289] task: ffff88007d0f5200 task.stack: ffffc9000006c000
[263565.201295] RIP: 0010:[<ffffffff8158e48b>] [<ffffffff8158e48b>] 
eth_header+0x3b/0xc0
[263565.209225] RSP: 0018:ffff88007fa83c58  EFLAGS: 00010286
[263565.214622] RAX: ffff88015a4d2dc8 RBX: 0000000000000008 RCX: 
ffff8800682434a0
[263565.221843] RDX: ffff88015a4d2dc8 RSI: ffff88015a4d2dc8 RDI: 
ffff880077aab000
[263565.229062] RBP: ffff88007b663d90 R08: ffff88007b663d90 R09: 
0000000000000574
[263565.236281] R10: ffff88007d1fa000 R11: 0000000000000000 R12: 
ffff8800682434a0
[263565.243501] R13: ffff88007d1fa000 R14: 0000000000000574 R15: 
0000000000000008
[263565.250719] FS:  0000000000000000(0000) GS:ffff88007fa80000(0000) 
knlGS:0000000000000000
[263565.258894] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[263565.264725] CR2: ffff88015a4d2dd4 CR3: 000000007ad73000 CR4: 
00000000000006f0
[263565.271944] Stack:
[263565.274041]  ffff880077aab000 ffff880068243400 ffff88007a745000 
ffff8800682434a0
[263565.281582]  0000000000000002 ffffffff81571d09 ffff880068243400 
ffff88007fa83d00
[263565.289121]  ffff88007a745000 ffff880077aab000 ffff88007a712000 
ffffffff815a8c61
[263565.296661] Call Trace:
[263565.299193]  <IRQ> [263565.301205] [<ffffffff81571d09>] ? 
neigh_connected_output+0xa9/0x100
[263565.307740]  [<ffffffff815a8c61>] ? ip_finish_output2+0x221/0x400
[263565.313920]  [<ffffffff8159e144>] ? nf_iterate+0x54/0x60
[263565.319319]  [<ffffffff815ab2fa>] ? ip_output+0x6a/0xf0
[263565.324631]  [<ffffffff8159e102>] ? nf_iterate+0x12/0x60
[263565.330030]  [<ffffffff815aa6e0>] ? ip_fragment.constprop.5+0x80/0x80
[263565.336556]  [<ffffffff815a73b6>] ? ip_forward+0x396/0x480
[263565.342128]  [<ffffffff815a6fb0>] ? ip_check_defrag+0x1e0/0x1e0
[263565.348134]  [<ffffffff815a5a2e>] ? ip_rcv+0x2ae/0x370
[263565.353361]  [<ffffffffa0107c02>] ? pppoe_rcv_core+0xd2/0x160 [pppoe]
[263565.359888]  [<ffffffff815a5170>] ? ip_local_deliver_finish+0x1d0/0x1d0
[263565.366586]  [<ffffffff81562a57>] ? __netif_receive_skb_core+0x527/0xa80
[263565.373373]  [<ffffffff81567632>] ? process_backlog+0x92/0x130
[263565.379291]  [<ffffffff8156745d>] ? net_rx_action+0x24d/0x390
[263565.385124]  [<ffffffff81628374>] ? __do_softirq+0xf4/0x2a0
[263565.390784]  [<ffffffff8107136c>] ? irq_exit+0xbc/0xd0
[263565.396008]  [<ffffffff81626cd6>] ? 
call_function_single_interrupt+0x96/0xa0
[263565.403141]  <EOI> [263565.405153] [<ffffffff81623eb0>] ? 
__sched_text_end+0x2/0x2
[263565.410907]  [<ffffffff81624182>] ? native_safe_halt+0x2/0x10
[263565.416741]  [<ffffffff81623ec8>] ? default_idle+0x18/0xd0
[263565.422314]  [<ffffffff810a7a46>] ? cpu_startup_entry+0x126/0x220
[263565.428492]  [<ffffffff8104c261>] ? start_secondary+0x161/0x180
[263565.434496] Code: 0e 00 00 00 53 89 d3 49 89 cc 4c 89 c5 45 89 ce e8 
bb 8a fc ff 66 83 fb 01 48 89 c6 74 44 66 83 fb 04 74 3e 66 c1 c3 08 48 
85 ed <66> 89 58 0c 74 40 8b 45 00 4d 85 e4 89 46 06 0f b7 45 04 66 89
[263565.454534] RIP  [<ffffffff8158e48b>] eth_header+0x3b/0xc0
[263565.460124]  RSP <ffff88007fa83c58>
[263565.463696] CR2: ffff88015a4d2dd4
[263565.467104] ---[ end trace a1bcaf3618724adf ]---
[263565.471807] Kernel panic - not syncing: Fatal exception in interrupt
[263565.478245] Kernel Offset: disabled
[263565.481818] Rebooting in 5 seconds..


^ permalink raw reply

* [PATCH v2] bpf: test_maps: Avoid possible out of bound access
From: Breno Leitao @ 2019-02-05 17:12 UTC (permalink / raw)
  To: netdev; +Cc: daniel, ast, davem, Breno Leitao

When compiling test_maps selftest with GCC-8, it warns that an array might
be indexed with a negative value, which could cause a negative out of bound
access, depending on parameters of the function. This is the GCC-8 warning:

	gcc -Wall -O2 -I../../../include/uapi -I../../../lib -I../../../lib/bpf -I../../../../include/generated -DHAVE_GENHDR -I../../../include    test_maps.c /home/breno/Devel/linux/tools/testing/selftests/bpf/libbpf.a -lcap -lelf -lrt -lpthread -o /home/breno/Devel/linux/tools/testing/selftests/bpf/test_maps
	In file included from test_maps.c:16:
	test_maps.c: In function ‘run_all_tests’:
	test_maps.c:1079:10: warning: array subscript -1 is below array bounds of ‘pid_t[<Ube20> + 1]’ [-Warray-bounds]
	   assert(waitpid(pid[i], &status, 0) == pid[i]);
		  ^~~~~~~~~~~~~~~~~~~~~~~~~~~
	test_maps.c:1059:6: warning: array subscript -1 is below array bounds of ‘pid_t[<Ube20> + 1]’ [-Warray-bounds]
	   pid[i] = fork();
	   ~~~^~~

This patch simply guarantees that the task(s) variables are unsigned, thus,
they could never be a negative number, hence avoiding an out of bound access
warning.

Signed-off-by: Breno Leitao <leitao@debian.org>
---
 tools/testing/selftests/bpf/test_maps.c | 27 +++++++++++++------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index e2b9eee37187..6e05a22b346c 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -43,7 +43,7 @@ static int map_flags;
 	}								\
 })
 
-static void test_hashmap(int task, void *data)
+static void test_hashmap(unsigned int task, void *data)
 {
 	long long key, next_key, first_key, value;
 	int fd;
@@ -133,7 +133,7 @@ static void test_hashmap(int task, void *data)
 	close(fd);
 }
 
-static void test_hashmap_sizes(int task, void *data)
+static void test_hashmap_sizes(unsigned int task, void *data)
 {
 	int fd, i, j;
 
@@ -153,7 +153,7 @@ static void test_hashmap_sizes(int task, void *data)
 		}
 }
 
-static void test_hashmap_percpu(int task, void *data)
+static void test_hashmap_percpu(unsigned int task, void *data)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
 	BPF_DECLARE_PERCPU(long, value);
@@ -280,7 +280,7 @@ static int helper_fill_hashmap(int max_entries)
 	return fd;
 }
 
-static void test_hashmap_walk(int task, void *data)
+static void test_hashmap_walk(unsigned int task, void *data)
 {
 	int fd, i, max_entries = 1000;
 	long long key, value, next_key;
@@ -351,7 +351,7 @@ static void test_hashmap_zero_seed(void)
 	close(second);
 }
 
-static void test_arraymap(int task, void *data)
+static void test_arraymap(unsigned int task, void *data)
 {
 	int key, next_key, fd;
 	long long value;
@@ -406,7 +406,7 @@ static void test_arraymap(int task, void *data)
 	close(fd);
 }
 
-static void test_arraymap_percpu(int task, void *data)
+static void test_arraymap_percpu(unsigned int task, void *data)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
 	BPF_DECLARE_PERCPU(long, values);
@@ -502,7 +502,7 @@ static void test_arraymap_percpu_many_keys(void)
 	close(fd);
 }
 
-static void test_devmap(int task, void *data)
+static void test_devmap(unsigned int task, void *data)
 {
 	int fd;
 	__u32 key, value;
@@ -517,7 +517,7 @@ static void test_devmap(int task, void *data)
 	close(fd);
 }
 
-static void test_queuemap(int task, void *data)
+static void test_queuemap(unsigned int task, void *data)
 {
 	const int MAP_SIZE = 32;
 	__u32 vals[MAP_SIZE + MAP_SIZE/2], val;
@@ -575,7 +575,7 @@ static void test_queuemap(int task, void *data)
 	close(fd);
 }
 
-static void test_stackmap(int task, void *data)
+static void test_stackmap(unsigned int task, void *data)
 {
 	const int MAP_SIZE = 32;
 	__u32 vals[MAP_SIZE + MAP_SIZE/2], val;
@@ -641,7 +641,7 @@ static void test_stackmap(int task, void *data)
 #define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
 #define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
 #define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o"
-static void test_sockmap(int tasks, void *data)
+static void test_sockmap(unsigned int tasks, void *data)
 {
 	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break;
 	int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break;
@@ -1258,10 +1258,11 @@ static void test_map_large(void)
 }
 
 #define run_parallel(N, FN, DATA) \
-	printf("Fork %d tasks to '" #FN "'\n", N); \
+	printf("Fork %u tasks to '" #FN "'\n", N); \
 	__run_parallel(N, FN, DATA)
 
-static void __run_parallel(int tasks, void (*fn)(int task, void *data),
+static void __run_parallel(unsigned int tasks,
+			   void (*fn)(unsigned int task, void *data),
 			   void *data)
 {
 	pid_t pid[tasks];
@@ -1302,7 +1303,7 @@ static void test_map_stress(void)
 #define DO_UPDATE 1
 #define DO_DELETE 0
 
-static void test_update_delete(int fn, void *data)
+static void test_update_delete(unsigned int fn, void *data)
 {
 	int do_update = ((int *)data)[1];
 	int fd = ((int *)data)[0];
-- 
2.19.0


^ permalink raw reply related

* Re: [B.A.T.M.A.N.] [RFC v4 00/19] batman-adv: netlink restructuring, part 2
From: Simon Wunderlich @ 2019-02-05 17:04 UTC (permalink / raw)
  To: b.a.t.m.a.n; +Cc: Sven Eckelmann, netdev, Jiri Pirko
In-Reply-To: <1895931.G10psR3j26@sven-edge>

[-- Attachment #1: Type: text/plain, Size: 1804 bytes --]

On Saturday, January 26, 2019 11:47:20 AM CET Sven Eckelmann wrote:
> Aggregated OGM is currently defined as:
> 
> 
> * according to batctl manpage:
> 
>     aggregation|ag [0|1]
>            If no parameter is given the current aggregation setting
>            is displayed. Otherwise the parameter is used to enable or
>            disable OGM packet aggregation.
> 
> * according to sysfs ABI:
> 
>     What:           /sys/class/net/<mesh_iface>/mesh/aggregated_ogms
>     Date:           May 2010
>     Contact:        Marek Lindner <mareklindner@neomailbox.ch>
>     Description:
>                     Indicates whether the batman protocol messages of the
>                     mesh <mesh_iface> shall be aggregated or not.
> 
> So sysfs is only one possible backend for the batctl command. There is 
> currently nothing which I would assume to be aggregatable beside OGMs but
> let  us assume for now that there is now something and some way to
> aggregate things beside OGMs in a save and backward compatible way. Let's
> call this FOO - so we have BATADV_ATTR_AGGREGATION_OGM_ENABLED and
> BATADV_ATTR_AGGREGATION_FOO_ENABLED. Or we have BATADV_ATTR_AGGREGATION as
> an  u32 and just use the second bit as marker for FOO (and of course the
> first bit as marker for OGM).
> 
> Would it now be more preferable to use BATADV_ATTR_AGGREGATION_OGM_ENABLED
> as  u8 (boolean) or to to switch to BATADV_ATTR_AGGREGATION (u32) & assign
> single bits to packet types.

I'd prefer BATADV_ATTR_AGGREGATION_OGM_ENABLED (as we have your patchset now). 
Although it may be technically possible to aggregate other things (e.g. 
broadcasts), I don't think this will be implemented anytime soon, if at all. 
And if we do, we can just make another BATADV_ATTR_AGGREGATION_FOO_ENABLED 
flag.

Cheers,
       Simon

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: Kernel panic in eth_header
From: Eric Dumazet @ 2019-02-05 16:57 UTC (permalink / raw)
  To: Andrew, Netdev
In-Reply-To: <18c17dde-5963-4412-2e98-ba44953f0ddd@seti.kr.ua>



On 02/05/2019 08:29 AM, Andrew wrote:
> Hi all.
> 
> After upgrade on PPPoE BRAS to kernel 4.9.153 I've got an kernel panic after a 3 days of uptime.
> 
> Unfortunately kernel is compiled w/o debug info; I rebuilt kernel with debug info enabled (kernel is compiled with same function addresses - I compare vmlinux symbol maps) - it says that panic is in net/ethernet/eth.c:88
> 
> Below there is a kernel panic trace. igb is from vendor, ver. 5.3.5.4. What extra info is needed?
> 
> [263565.106441] BUG: unable to handle kernel paging request at ffff88015a4d2dd4
> [263565.113527] IP: [<ffffffff8158e48b>] eth_header+0x3b/0xc0
> [263565.119030] PGD 1e8f067 [263565.121474] PUD 0
> [263565.123580]
> [263565.125166] Oops: 0002 [#1] SMP
> [263565.128398] Modules linked in: xt_nat iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 iptable_filter xt_length xt_TCPMSS xt_tcpudp xt_mark xt_dscp iptable_mangle ip_tables x_tables nf_nat_pptp nf_conntrack_pptp nf_conntrack_proto_gre nf_nat_proto_gre nf_nat nf_conntrack sch_sfq sch_htb cls_u32 sch_ingress sch_prio sch_tbf cls_flow cls_fw act_police ifb 8021q mrp garp stp llc softdog pppoe pppox ppp_generic slhc i2c_nforce2 i2c_core igb(O) parport_pc dca parport thermal asus_atk0110 fan ptp k10temp hwmon pps_core nv_tco
> [263565.176083] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G           O    4.9.153-x86_64 #1
> [263565.183996] Hardware name: System manufacturer System Product Name/M2N-E, BIOS ASUS M2N-E ACPI BIOS Revision 5001 03/23/2010
> [263565.195289] task: ffff88007d0f5200 task.stack: ffffc9000006c000
> [263565.201295] RIP: 0010:[<ffffffff8158e48b>] [<ffffffff8158e48b>] eth_header+0x3b/0xc0
> [263565.209225] RSP: 0018:ffff88007fa83c58  EFLAGS: 00010286
> [263565.214622] RAX: ffff88015a4d2dc8 RBX: 0000000000000008 RCX: ffff8800682434a0
> [263565.221843] RDX: ffff88015a4d2dc8 RSI: ffff88015a4d2dc8 RDI: ffff880077aab000
> [263565.229062] RBP: ffff88007b663d90 R08: ffff88007b663d90 R09: 0000000000000574
> [263565.236281] R10: ffff88007d1fa000 R11: 0000000000000000 R12: ffff8800682434a0
> [263565.243501] R13: ffff88007d1fa000 R14: 0000000000000574 R15: 0000000000000008
> [263565.250719] FS:  0000000000000000(0000) GS:ffff88007fa80000(0000) knlGS:0000000000000000
> [263565.258894] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [263565.264725] CR2: ffff88015a4d2dd4 CR3: 000000007ad73000 CR4: 00000000000006f0
> [263565.271944] Stack:
> [263565.274041]  ffff880077aab000 ffff880068243400 ffff88007a745000 ffff8800682434a0
> [263565.281582]  0000000000000002 ffffffff81571d09 ffff880068243400 ffff88007fa83d00
> [263565.289121]  ffff88007a745000 ffff880077aab000 ffff88007a712000 ffffffff815a8c61
> [263565.296661] Call Trace:
> [263565.299193]  <IRQ> [263565.301205] [<ffffffff81571d09>] ? neigh_connected_output+0xa9/0x100
> [263565.307740]  [<ffffffff815a8c61>] ? ip_finish_output2+0x221/0x400
> [263565.313920]  [<ffffffff8159e144>] ? nf_iterate+0x54/0x60
> [263565.319319]  [<ffffffff815ab2fa>] ? ip_output+0x6a/0xf0
> [263565.324631]  [<ffffffff8159e102>] ? nf_iterate+0x12/0x60
> [263565.330030]  [<ffffffff815aa6e0>] ? ip_fragment.constprop.5+0x80/0x80
> [263565.336556]  [<ffffffff815a73b6>] ? ip_forward+0x396/0x480
> [263565.342128]  [<ffffffff815a6fb0>] ? ip_check_defrag+0x1e0/0x1e0
> [263565.348134]  [<ffffffff815a5a2e>] ? ip_rcv+0x2ae/0x370
> [263565.353361]  [<ffffffffa0107c02>] ? pppoe_rcv_core+0xd2/0x160 [pppoe]
> [263565.359888]  [<ffffffff815a5170>] ? ip_local_deliver_finish+0x1d0/0x1d0
> [263565.366586]  [<ffffffff81562a57>] ? __netif_receive_skb_core+0x527/0xa80
> [263565.373373]  [<ffffffff81567632>] ? process_backlog+0x92/0x130
> [263565.379291]  [<ffffffff8156745d>] ? net_rx_action+0x24d/0x390
> [263565.385124]  [<ffffffff81628374>] ? __do_softirq+0xf4/0x2a0
> [263565.390784]  [<ffffffff8107136c>] ? irq_exit+0xbc/0xd0
> [263565.396008]  [<ffffffff81626cd6>] ? call_function_single_interrupt+0x96/0xa0
> [263565.403141]  <EOI> [263565.405153] [<ffffffff81623eb0>] ? __sched_text_end+0x2/0x2
> [263565.410907]  [<ffffffff81624182>] ? native_safe_halt+0x2/0x10
> [263565.416741]  [<ffffffff81623ec8>] ? default_idle+0x18/0xd0
> [263565.422314]  [<ffffffff810a7a46>] ? cpu_startup_entry+0x126/0x220
> [263565.428492]  [<ffffffff8104c261>] ? start_secondary+0x161/0x180
> [263565.434496] Code: 0e 00 00 00 53 89 d3 49 89 cc 4c 89 c5 45 89 ce e8 bb 8a fc ff 66 83 fb 01 48 89 c6 74 44 66 83 fb 04 74 3e 66 c1 c3 08 48 85 ed <66> 89 58 0c 74 40 8b 45 00 4d 85 e4 89 46 06 0f b7 45 04 66 89
> [263565.454534] RIP  [<ffffffff8158e48b>] eth_header+0x3b/0xc0
> [263565.460124]  RSP <ffff88007fa83c58>
> [263565.463696] CR2: ffff88015a4d2dd4
> [263565.467104] ---[ end trace a1bcaf3618724adf ]---
> [263565.471807] Kernel panic - not syncing: Fatal exception in interrupt
> [263565.478245] Kernel Offset: disabled
> [263565.481818] Rebooting in 5 seconds..
> 


This is a well known issue, a fix should come shortly in stable branches

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index f8bbd693c19c247e41839c2d0b5318ca51b23ee8..d95b32af4a0e3f552405c9e61cc372729834160c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -425,6 +425,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
         * fragment.
         */
 
+       err = -EINVAL;
        /* Find out where to put this fragment.  */
        prev_tail = qp->q.fragments_tail;
        if (!prev_tail)
@@ -501,7 +502,6 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 
 discard_qp:
        inet_frag_kill(&qp->q);
-       err = -EINVAL;
        __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
 err:
        kfree_skb(skb);




^ permalink raw reply related

* Re: [PATCH net-next v7 0/8] devlink: Add configuration parameters support for devlink_port
From: Michal Kubecek @ 2019-02-05 16:51 UTC (permalink / raw)
  To: Vasundhara Volam
  Cc: Jakub Kicinski, Netdev, David Miller, michael.chan@broadcom.com,
	Jiri Pirko
In-Reply-To: <CAACQVJqPdrDTrFwZPt+XaGEf3-H81EWELR_SvZZG83mF+54MsQ@mail.gmail.com>

On Tue, Feb 05, 2019 at 09:53:26AM +0530, Vasundhara Volam wrote:
> On Tue, Feb 5, 2019 at 8:26 AM Jakub Kicinski
> >
> > No?  We were talking about using the soon-too-come ethtool netlink
> > API with additional indication that given configuration request is
> > supposed to be persisted.  Adding more devlink parameters is exactly
> > the opposite of what you should be doing.
> 
> Okay. So, till then can we have the devlink wake_on_lan parameter or
> you want this to be removed? Could you please clarify?
> 
> Once ethtool netlink API is available with persisted support, I can remove
> this wake_on_lan parameter from devlink. Thanks.

Once you provide an interface for userspace and applications start using
it, it's hard to get rid of it. As an extreme example, the legacy ioctl
interface used by ifconfig has been declared obsolete since kernel 2.2.0
(January 1999, i.e. 20 years ago) and we still have to maintain it.

Michal Kubecek

^ permalink raw reply

* Re: [PATCH] net: dsa: Fix lockdep false positive splat
From: Vivien Didelot @ 2019-02-05 16:35 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: netdev, linux-kernel, Andrew Lunn, Florian Fainelli,
	David S. Miller
In-Reply-To: <20190202175329.5969-1-marc.zyngier@arm.com>

On Sat,  2 Feb 2019 17:53:29 +0000, Marc Zyngier <marc.zyngier@arm.com> wrote:
> Creating a macvtap on a DSA-backed interface results in the following
> splat when lockdep is enabled:
> 
> [   19.638080] IPv6: ADDRCONF(NETDEV_CHANGE): lan0: link becomes ready
> [   23.041198] device lan0 entered promiscuous mode
> [   23.043445] device eth0 entered promiscuous mode
> [   23.049255]
> [   23.049557] ============================================
> [   23.055021] WARNING: possible recursive locking detected
> [   23.060490] 5.0.0-rc3-00013-g56c857a1b8d3 #118 Not tainted
> [   23.066132] --------------------------------------------
> [   23.071598] ip/2861 is trying to acquire lock:
> [   23.076171] 00000000f61990cb (_xmit_ETHER){+...}, at: dev_set_rx_mode+0x1c/0x38
> [   23.083693]
> [   23.083693] but task is already holding lock:
> [   23.089696] 00000000ecf0c3b4 (_xmit_ETHER){+...}, at: dev_uc_add+0x24/0x70
> [   23.096774]
> [   23.096774] other info that might help us debug this:
> [   23.103494]  Possible unsafe locking scenario:
> [   23.103494]
> [   23.109584]        CPU0
> [   23.112093]        ----
> [   23.114601]   lock(_xmit_ETHER);
> [   23.117917]   lock(_xmit_ETHER);
> [   23.121233]
> [   23.121233]  *** DEADLOCK ***
> [   23.121233]
> [   23.127325]  May be due to missing lock nesting notation
> [   23.127325]
> [   23.134315] 2 locks held by ip/2861:
> [   23.137987]  #0: 000000003b766c72 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x338/0x4e0
> [   23.146231]  #1: 00000000ecf0c3b4 (_xmit_ETHER){+...}, at: dev_uc_add+0x24/0x70
> [   23.153757]
> [   23.153757] stack backtrace:
> [   23.158243] CPU: 0 PID: 2861 Comm: ip Not tainted 5.0.0-rc3-00013-g56c857a1b8d3 #118
> [   23.166212] Hardware name: Globalscale Marvell ESPRESSOBin Board (DT)
> [   23.172843] Call trace:
> [   23.175358]  dump_backtrace+0x0/0x188
> [   23.179116]  show_stack+0x14/0x20
> [   23.182524]  dump_stack+0xb4/0xec
> [   23.185928]  __lock_acquire+0x123c/0x1860
> [   23.190048]  lock_acquire+0xc8/0x248
> [   23.193724]  _raw_spin_lock_bh+0x40/0x58
> [   23.197755]  dev_set_rx_mode+0x1c/0x38
> [   23.201607]  dev_set_promiscuity+0x3c/0x50
> [   23.205820]  dsa_slave_change_rx_flags+0x5c/0x70
> [   23.210567]  __dev_set_promiscuity+0x148/0x1e0
> [   23.215136]  __dev_set_rx_mode+0x74/0x98
> [   23.219167]  dev_uc_add+0x54/0x70
> [   23.222575]  macvlan_open+0x170/0x1d0
> [   23.226336]  __dev_open+0xe0/0x160
> [   23.229830]  __dev_change_flags+0x16c/0x1b8
> [   23.234132]  dev_change_flags+0x20/0x60
> [   23.238074]  do_setlink+0x2d0/0xc50
> [   23.241658]  __rtnl_newlink+0x5f8/0x6e8
> [   23.245601]  rtnl_newlink+0x50/0x78
> [   23.249184]  rtnetlink_rcv_msg+0x360/0x4e0
> [   23.253397]  netlink_rcv_skb+0xe8/0x130
> [   23.257338]  rtnetlink_rcv+0x14/0x20
> [   23.261012]  netlink_unicast+0x190/0x210
> [   23.265043]  netlink_sendmsg+0x288/0x350
> [   23.269075]  sock_sendmsg+0x18/0x30
> [   23.272659]  ___sys_sendmsg+0x29c/0x2c8
> [   23.276602]  __sys_sendmsg+0x60/0xb8
> [   23.280276]  __arm64_sys_sendmsg+0x1c/0x28
> [   23.284488]  el0_svc_common+0xd8/0x138
> [   23.288340]  el0_svc_handler+0x24/0x80
> [   23.292192]  el0_svc+0x8/0xc
> 
> This looks fairly harmless (no actual deadlock occurs), and is
> fixed in a similar way to c6894dec8ea9 ("bridge: fix lockdep
> addr_list_lock false positive splat") by putting the addr_list_lock
> in its own lockdep class.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

Reviewed-by: Vivien Didelot <vivien.didelot@gmail.com>

^ permalink raw reply

* Kernel panic in eth_header
From: Andrew @ 2019-02-05 16:29 UTC (permalink / raw)
  To: Netdev

Hi all.

After upgrade on PPPoE BRAS to kernel 4.9.153 I've got an kernel panic 
after a 3 days of uptime.

Unfortunately kernel is compiled w/o debug info; I rebuilt kernel with 
debug info enabled (kernel is compiled with same function addresses - I 
compare vmlinux symbol maps) - it says that panic is in 
net/ethernet/eth.c:88

Below there is a kernel panic trace. igb is from vendor, ver. 5.3.5.4. 
What extra info is needed?

[263565.106441] BUG: unable to handle kernel paging request at 
ffff88015a4d2dd4
[263565.113527] IP: [<ffffffff8158e48b>] eth_header+0x3b/0xc0
[263565.119030] PGD 1e8f067 [263565.121474] PUD 0
[263565.123580]
[263565.125166] Oops: 0002 [#1] SMP
[263565.128398] Modules linked in: xt_nat iptable_nat nf_conntrack_ipv4 
nf_defrag_ipv4 nf_nat_ipv4 iptable_filter xt_length xt_TCPMSS xt_tcpudp 
xt_mark xt_dscp iptable_mangle ip_tables x_tables nf_nat_pptp 
nf_conntrack_pptp nf_conntrack_proto_gre nf_nat_proto_gre nf_nat 
nf_conntrack sch_sfq sch_htb cls_u32 sch_ingress sch_prio sch_tbf 
cls_flow cls_fw act_police ifb 8021q mrp garp stp llc softdog pppoe 
pppox ppp_generic slhc i2c_nforce2 i2c_core igb(O) parport_pc dca 
parport thermal asus_atk0110 fan ptp k10temp hwmon pps_core nv_tco
[263565.176083] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G           O    
4.9.153-x86_64 #1
[263565.183996] Hardware name: System manufacturer System Product 
Name/M2N-E, BIOS ASUS M2N-E ACPI BIOS Revision 5001 03/23/2010
[263565.195289] task: ffff88007d0f5200 task.stack: ffffc9000006c000
[263565.201295] RIP: 0010:[<ffffffff8158e48b>] [<ffffffff8158e48b>] 
eth_header+0x3b/0xc0
[263565.209225] RSP: 0018:ffff88007fa83c58  EFLAGS: 00010286
[263565.214622] RAX: ffff88015a4d2dc8 RBX: 0000000000000008 RCX: 
ffff8800682434a0
[263565.221843] RDX: ffff88015a4d2dc8 RSI: ffff88015a4d2dc8 RDI: 
ffff880077aab000
[263565.229062] RBP: ffff88007b663d90 R08: ffff88007b663d90 R09: 
0000000000000574
[263565.236281] R10: ffff88007d1fa000 R11: 0000000000000000 R12: 
ffff8800682434a0
[263565.243501] R13: ffff88007d1fa000 R14: 0000000000000574 R15: 
0000000000000008
[263565.250719] FS:  0000000000000000(0000) GS:ffff88007fa80000(0000) 
knlGS:0000000000000000
[263565.258894] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[263565.264725] CR2: ffff88015a4d2dd4 CR3: 000000007ad73000 CR4: 
00000000000006f0
[263565.271944] Stack:
[263565.274041]  ffff880077aab000 ffff880068243400 ffff88007a745000 
ffff8800682434a0
[263565.281582]  0000000000000002 ffffffff81571d09 ffff880068243400 
ffff88007fa83d00
[263565.289121]  ffff88007a745000 ffff880077aab000 ffff88007a712000 
ffffffff815a8c61
[263565.296661] Call Trace:
[263565.299193]  <IRQ> [263565.301205] [<ffffffff81571d09>] ? 
neigh_connected_output+0xa9/0x100
[263565.307740]  [<ffffffff815a8c61>] ? ip_finish_output2+0x221/0x400
[263565.313920]  [<ffffffff8159e144>] ? nf_iterate+0x54/0x60
[263565.319319]  [<ffffffff815ab2fa>] ? ip_output+0x6a/0xf0
[263565.324631]  [<ffffffff8159e102>] ? nf_iterate+0x12/0x60
[263565.330030]  [<ffffffff815aa6e0>] ? ip_fragment.constprop.5+0x80/0x80
[263565.336556]  [<ffffffff815a73b6>] ? ip_forward+0x396/0x480
[263565.342128]  [<ffffffff815a6fb0>] ? ip_check_defrag+0x1e0/0x1e0
[263565.348134]  [<ffffffff815a5a2e>] ? ip_rcv+0x2ae/0x370
[263565.353361]  [<ffffffffa0107c02>] ? pppoe_rcv_core+0xd2/0x160 [pppoe]
[263565.359888]  [<ffffffff815a5170>] ? ip_local_deliver_finish+0x1d0/0x1d0
[263565.366586]  [<ffffffff81562a57>] ? __netif_receive_skb_core+0x527/0xa80
[263565.373373]  [<ffffffff81567632>] ? process_backlog+0x92/0x130
[263565.379291]  [<ffffffff8156745d>] ? net_rx_action+0x24d/0x390
[263565.385124]  [<ffffffff81628374>] ? __do_softirq+0xf4/0x2a0
[263565.390784]  [<ffffffff8107136c>] ? irq_exit+0xbc/0xd0
[263565.396008]  [<ffffffff81626cd6>] ? 
call_function_single_interrupt+0x96/0xa0
[263565.403141]  <EOI> [263565.405153] [<ffffffff81623eb0>] ? 
__sched_text_end+0x2/0x2
[263565.410907]  [<ffffffff81624182>] ? native_safe_halt+0x2/0x10
[263565.416741]  [<ffffffff81623ec8>] ? default_idle+0x18/0xd0
[263565.422314]  [<ffffffff810a7a46>] ? cpu_startup_entry+0x126/0x220
[263565.428492]  [<ffffffff8104c261>] ? start_secondary+0x161/0x180
[263565.434496] Code: 0e 00 00 00 53 89 d3 49 89 cc 4c 89 c5 45 89 ce e8 
bb 8a fc ff 66 83 fb 01 48 89 c6 74 44 66 83 fb 04 74 3e 66 c1 c3 08 48 
85 ed <66> 89 58 0c 74 40 8b 45 00 4d 85 e4 89 46 06 0f b7 45 04 66 89
[263565.454534] RIP  [<ffffffff8158e48b>] eth_header+0x3b/0xc0
[263565.460124]  RSP <ffff88007fa83c58>
[263565.463696] CR2: ffff88015a4d2dd4
[263565.467104] ---[ end trace a1bcaf3618724adf ]---
[263565.471807] Kernel panic - not syncing: Fatal exception in interrupt
[263565.478245] Kernel Offset: disabled
[263565.481818] Rebooting in 5 seconds..


^ permalink raw reply

* Re: [PATCH net-next v3] net: dsa: mv88e6xxx: Prevent suspend to RAM
From: Vivien Didelot @ 2019-02-05 16:28 UTC (permalink / raw)
  To: Miquel Raynal
  Cc: Andrew Lunn, Florian Fainelli, David S. Miller, netdev,
	linux-kernel, Thomas Petazzoni, Gregory Clement, Antoine Tenart,
	Maxime Chevallier, Nadav Haklai, Miquel Raynal
In-Reply-To: <20190205110728.11451-1-miquel.raynal@bootlin.com>

Hi Miquel,

On Tue,  5 Feb 2019 12:07:28 +0100, Miquel Raynal <miquel.raynal@bootlin.com> wrote:

> +/* There is no suspend to RAM support at DSA level yet, the switch configuration
> + * would be lost after a power cycle so prevent it to be suspended.
> + */
> +static int __maybe_unused mv88e6xxx_suspend(struct device *dev)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
> +static int __maybe_unused mv88e6xxx_resume(struct device *dev)
> +{
> +	return 0;
> +}

The code looks good but my only concern is -EOPNOTSUPP. In this
context this code is specific to callbacks targeting bridge and
switchdev, while the dev_pm_ops are completely parallel to DSA.

It is intuitive but given Documentation/power/runtime_pm.txt, this
will default to being interpreted as a fatal error, while -EBUSY
seems to keep the device in an 'active' state in a saner way.

I don't understand yet how to properly tell PM core that suspend to RAM
isn't supported. If an error code different from -EAGAIN or -EBUSY
is the way to go, I'm good with it:

Reviewed-by: Vivien Didelot <vivien.didelot@gmail.com>

Thanks,

	Vivien

^ permalink raw reply

* [PATCH net] net: broadcom: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-05 16:25 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, yang.wei9, albin_yang

From: Yang Wei <yang.wei9@zte.com.cn>

dev_consume_skb_irq() should be called in sbdma_tx_process() when
skb xmit done. It makes drop profiles(dropwatch, perf) more
friendly.

Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
 drivers/net/ethernet/broadcom/sb1250-mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 5db9f41..134ae28 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -1288,7 +1288,7 @@ static void sbdma_tx_process(struct sbmac_softc *sc, struct sbmacdma *d,
 		 * for transmits, we just free buffers.
 		 */
 
-		dev_kfree_skb_irq(sb);
+		dev_consume_skb_irq(sb);
 
 		/*
 		 * .. and advance to the next buffer.
-- 
2.7.4



^ permalink raw reply related

* [PATCH net] net: via-velocity: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-05 16:22 UTC (permalink / raw)
  To: netdev; +Cc: romieu, davem, yang.wei9, albin_yang

From: Yang Wei <yang.wei9@zte.com.cn>

dev_consume_skb_irq() should be called in velocity_free_tx_buf()
when skb xmit done. It makes drop profiles(dropwatch, perf) more
friendly.

Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
 drivers/net/ethernet/via/via-velocity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index 8241269..27f6cf1 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -1740,7 +1740,7 @@ static void velocity_free_tx_buf(struct velocity_info *vptr,
 		dma_unmap_single(vptr->dev, tdinfo->skb_dma[i],
 				 le16_to_cpu(pktlen), DMA_TO_DEVICE);
 	}
-	dev_kfree_skb_irq(skb);
+	dev_consume_skb_irq(skb);
 	tdinfo->skb = NULL;
 }
 
-- 
2.7.4



^ permalink raw reply related

* [PATCH net] net: tehuti: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-05 16:21 UTC (permalink / raw)
  To: netdev; +Cc: andy, davem, yang.wei9, albin_yang

From: Yang Wei <yang.wei9@zte.com.cn>

dev_consume_skb_irq() should be called in bdx_tx_cleanup() when skb
xmit done. It makes drop profiles(dropwatch, perf) more friendly.

Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
 drivers/net/ethernet/tehuti/tehuti.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index dc966ddb..b24c111 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -1739,7 +1739,7 @@ static void bdx_tx_cleanup(struct bdx_priv *priv)
 		tx_level -= db->rptr->len;	/* '-' koz len is negative */
 
 		/* now should come skb pointer - free it */
-		dev_kfree_skb_irq(db->rptr->addr.skb);
+		dev_consume_skb_irq(db->rptr->addr.skb);
 		bdx_tx_db_inc_rptr(db);
 	}
 
-- 
2.7.4



^ permalink raw reply related

* [PATCH net] net: sun: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-05 16:19 UTC (permalink / raw)
  To: netdev; +Cc: davem, yanjun.zhu, shannon.nelson, robh, yang.wei9, albin_yang

From: Yang Wei <yang.wei9@zte.com.cn>

dev_consume_skb_irq() should be called when skb xmit done. It makes
drop profiles(dropwatch, perf) more friendly.

Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
 drivers/net/ethernet/sun/cassini.c | 2 +-
 drivers/net/ethernet/sun/sunbmac.c | 2 +-
 drivers/net/ethernet/sun/sunhme.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 7ec4eb7..6fc05c1 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -1898,7 +1898,7 @@ static inline void cas_tx_ringN(struct cas *cp, int ring, int limit)
 		cp->net_stats[ring].tx_packets++;
 		cp->net_stats[ring].tx_bytes += skb->len;
 		spin_unlock(&cp->stat_lock[ring]);
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 	}
 	cp->tx_old[ring] = entry;
 
diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index 720b7ac..e9b757b 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c
@@ -781,7 +781,7 @@ static void bigmac_tx(struct bigmac *bp)
 
 		DTX(("skb(%p) ", skb));
 		bp->tx_skbs[elem] = NULL;
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 
 		elem = NEXT_TX(elem);
 	}
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index ff641cf..d007dfe 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -1962,7 +1962,7 @@ static void happy_meal_tx(struct happy_meal *hp)
 			this = &txbase[elem];
 		}
 
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 		dev->stats.tx_packets++;
 	}
 	hp->tx_old = elem;
-- 
2.7.4



^ permalink raw reply related

* [PATCH net] net: fsl_ucc_hdlc: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-05 16:14 UTC (permalink / raw)
  To: netdev, linuxppc-dev; +Cc: qiang.zhao, davem, yang.wei9, albin_yang

From: Yang Wei <yang.wei9@zte.com.cn>

dev_consume_skb_irq() should be called in hdlc_tx_done() when skb
xmit done. It makes drop profiles(dropwatch, perf) more friendly.

Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
 drivers/net/wan/fsl_ucc_hdlc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 66d889d..a08f04c 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -482,7 +482,7 @@ static int hdlc_tx_done(struct ucc_hdlc_private *priv)
 		memset(priv->tx_buffer +
 		       (be32_to_cpu(bd->buf) - priv->dma_tx_addr),
 		       0, skb->len);
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 
 		priv->tx_skbuff[priv->skb_dirtytx] = NULL;
 		priv->skb_dirtytx =
-- 
2.7.4



^ permalink raw reply related

* [PATCH net] net: fec_mpc52xx: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-05 16:12 UTC (permalink / raw)
  To: netdev; +Cc: davem, yang.wei9, albin_yang

From: Yang Wei <yang.wei9@zte.com.cn>

dev_consume_skb_irq() should be called in mpc52xx_fec_tx_interrupt()
when skb xmit done. It makes drop profiles(dropwatch, perf) more
friendly.

Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
 drivers/net/ethernet/freescale/fec_mpc52xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index b90bab7..c1968b3 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -369,7 +369,7 @@ static irqreturn_t mpc52xx_fec_tx_interrupt(int irq, void *dev_id)
 		dma_unmap_single(dev->dev.parent, bd->skb_pa, skb->len,
 				 DMA_TO_DEVICE);
 
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 	}
 	spin_unlock(&priv->lock);
 
-- 
2.7.4



^ permalink raw reply related

* Need to retouch your photos?
From: Stacy @ 2019-02-04 11:55 UTC (permalink / raw)
  To: netdev

Need to retouch your photos?  Deep etching or masking for your photos?

We are the studio who can do those service for your photos.

Please send photos to start

Thanks,
Stacy

Ulmd

Bietigheim

^ permalink raw reply

* Need to retouch your photos?
From: Stacy @ 2019-02-04  9:58 UTC (permalink / raw)
  To: netdev

Need to retouch your photos?  Deep etching or masking for your photos?

We are the studio who can do those service for your photos.

Please send photos to start

Thanks,
Stacy

Redmscheid

Ansbadch

^ permalink raw reply

* [PATCH net] net: smsc: epic100: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-05 16:09 UTC (permalink / raw)
  To: netdev; +Cc: davem, colin.king, yang.wei9, albin_yang

From: Yang Wei <yang.wei9@zte.com.cn>

dev_consume_skb_irq() should be called in epic_tx() when skb xmit
done. It makes drop profiles(dropwatch, perf) more friendly.

Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
 drivers/net/ethernet/smsc/epic100.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c
index 15c62c1..be47d86 100644
--- a/drivers/net/ethernet/smsc/epic100.c
+++ b/drivers/net/ethernet/smsc/epic100.c
@@ -1037,7 +1037,7 @@ static void epic_tx(struct net_device *dev, struct epic_private *ep)
 		skb = ep->tx_skbuff[entry];
 		pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr,
 				 skb->len, PCI_DMA_TODEVICE);
-		dev_kfree_skb_irq(skb);
+		dev_consume_skb_irq(skb);
 		ep->tx_skbuff[entry] = NULL;
 	}
 
-- 
2.7.4



^ permalink raw reply related

* [PATCH net] net: dscc4: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-05 16:07 UTC (permalink / raw)
  To: netdev; +Cc: romieu, davem, yang.wei9, albin_yang

From: Yang Wei <yang.wei9@zte.com.cn>

dev_consume_skb_irq() should be called in dscc4_tx_irq() when skb
xmit done. It makes drop profiles(dropwatch, perf) more friendly.

Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
 drivers/net/wan/dscc4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index c0b0f52..27decf8 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c
@@ -1575,7 +1575,7 @@ static void dscc4_tx_irq(struct dscc4_pci_priv *ppriv,
 					dev->stats.tx_packets++;
 					dev->stats.tx_bytes += skb->len;
 				}
-				dev_kfree_skb_irq(skb);
+				dev_consume_skb_irq(skb);
 				dpriv->tx_skbuff[cur] = NULL;
 				++dpriv->tx_dirty;
 			} else {
-- 
2.7.4



^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox