Netdev List

Netdev List
 help / color / mirror / Atom feed

* [net-next v2 1/4] test_rhashtable: don't allocate huge static array
From: Florian Westphal @ 2017-09-19 23:12 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal
In-Reply-To: <20170919231214.2281-1-fw@strlen.de>

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 lib/test_rhashtable.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 0ffca990a833..c40d6e636f33 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -72,8 +72,6 @@ struct thread_data {
 	struct test_obj *objs;
 };
 
-static struct test_obj array[MAX_ENTRIES];
-
 static struct rhashtable_params test_rht_params = {
 	.head_offset = offsetof(struct test_obj, node),
 	.key_offset = offsetof(struct test_obj, value),
@@ -85,7 +83,7 @@ static struct rhashtable_params test_rht_params = {
 static struct semaphore prestart_sem;
 static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0);
 
-static int insert_retry(struct rhashtable *ht, struct rhash_head *obj,
+static int insert_retry(struct rhashtable *ht, struct test_obj *obj,
                         const struct rhashtable_params params)
 {
 	int err, retries = -1, enomem_retries = 0;
@@ -93,7 +91,7 @@ static int insert_retry(struct rhashtable *ht, struct rhash_head *obj,
 	do {
 		retries++;
 		cond_resched();
-		err = rhashtable_insert_fast(ht, obj, params);
+		err = rhashtable_insert_fast(ht, &obj->node, params);
 		if (err == -ENOMEM && enomem_retry) {
 			enomem_retries++;
 			err = -EBUSY;
@@ -107,7 +105,7 @@ static int insert_retry(struct rhashtable *ht, struct rhash_head *obj,
 	return err ? : retries;
 }
 
-static int __init test_rht_lookup(struct rhashtable *ht)
+static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array)
 {
 	unsigned int i;
 
@@ -186,7 +184,7 @@ static void test_bucket_stats(struct rhashtable *ht)
 		pr_warn("Test failed: Total count mismatch ^^^");
 }
 
-static s64 __init test_rhashtable(struct rhashtable *ht)
+static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array)
 {
 	struct test_obj *obj;
 	int err;
@@ -203,7 +201,7 @@ static s64 __init test_rhashtable(struct rhashtable *ht)
 		struct test_obj *obj = &array[i];
 
 		obj->value.id = i * 2;
-		err = insert_retry(ht, &obj->node, test_rht_params);
+		err = insert_retry(ht, obj, test_rht_params);
 		if (err > 0)
 			insert_retries += err;
 		else if (err)
@@ -216,7 +214,7 @@ static s64 __init test_rhashtable(struct rhashtable *ht)
 
 	test_bucket_stats(ht);
 	rcu_read_lock();
-	test_rht_lookup(ht);
+	test_rht_lookup(ht, array);
 	rcu_read_unlock();
 
 	test_bucket_stats(ht);
@@ -286,7 +284,7 @@ static int threadfunc(void *data)
 	for (i = 0; i < entries; i++) {
 		tdata->objs[i].value.id = i;
 		tdata->objs[i].value.tid = tdata->id;
-		err = insert_retry(&ht, &tdata->objs[i].node, test_rht_params);
+		err = insert_retry(&ht, &tdata->objs[i], test_rht_params);
 		if (err > 0) {
 			insert_retries += err;
 		} else if (err) {
@@ -349,6 +347,10 @@ static int __init test_rht_init(void)
 	test_rht_params.max_size = max_size ? : roundup_pow_of_two(entries);
 	test_rht_params.nelem_hint = size;
 
+	objs = vzalloc((test_rht_params.max_size + 1) * sizeof(struct test_obj));
+	if (!objs)
+		return -ENOMEM;
+
 	pr_info("Running rhashtable test nelem=%d, max_size=%d, shrinking=%d\n",
 		size, max_size, shrinking);
 
@@ -356,7 +358,8 @@ static int __init test_rht_init(void)
 		s64 time;
 
 		pr_info("Test %02d:\n", i);
-		memset(&array, 0, sizeof(array));
+		memset(objs, 0, test_rht_params.max_size * sizeof(struct test_obj));
+
 		err = rhashtable_init(&ht, &test_rht_params);
 		if (err < 0) {
 			pr_warn("Test failed: Unable to initialize hashtable: %d\n",
@@ -364,9 +367,10 @@ static int __init test_rht_init(void)
 			continue;
 		}
 
-		time = test_rhashtable(&ht);
+		time = test_rhashtable(&ht, objs);
 		rhashtable_destroy(&ht);
 		if (time < 0) {
+			vfree(objs);
 			pr_warn("Test failed: return code %lld\n", time);
 			return -EINVAL;
 		}
@@ -374,6 +378,7 @@ static int __init test_rht_init(void)
 		total_time += time;
 	}
 
+	vfree(objs);
 	do_div(total_time, runs);
 	pr_info("Average test time: %llu\n", total_time);
 
-- 
2.13.5

^ permalink raw reply related

* [net-next v2 0/4] test_rhashtable: don't allocate huge static array
From: Florian Westphal @ 2017-09-19 23:12 UTC (permalink / raw)
  To: netdev

Add a test case for the rhlist interface.
While at it, cleanup current rhashtable test a bit and add a check
for max_size support.

No changes since v1, except in last patch.
kbuild robot complained about large onstack allocation caused by
struct rhltable when lockdep is enabled.

^ permalink raw reply

* Re: [PATCH net-next v3 00/12] net: dsa: b53/bcm_sf2 cleanups
From: David Miller @ 2017-09-19 23:09 UTC (permalink / raw)
  To: f.fainelli; +Cc: netdev, andrew, vivien.didelot
In-Reply-To: <20170919174654.2122-1-f.fainelli@gmail.com>

From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 19 Sep 2017 10:46:42 -0700

> This patch series is a first pass set of clean-ups to reduce the number of LOCs
> between b53 and bcm_sf2 and sharing as many functions as possible.
> 
> There is a number of additional cleanups queued up locally that require more
> thorough testing.

Series applied, thanks.

^ permalink raw reply

* Re: [PATCH V2 net 0/7] Bug fixes for the HNS3 Ethernet Driver for Hip08 SoC
From: David Miller @ 2017-09-19 23:07 UTC (permalink / raw)
  To: salil.mehta-hv44wF8Li93QT0dZR+AlfA
  Cc: yisen.zhuang-hv44wF8Li93QT0dZR+AlfA,
	lipeng321-hv44wF8Li93QT0dZR+AlfA,
	mehta.salil.lnk-Re5JQEeQqe8AvxtiuMwx3w,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	linuxarm-hv44wF8Li93QT0dZR+AlfA
In-Reply-To: <20170919161716.92680-1-salil.mehta-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

From: Salil Mehta <salil.mehta-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Date: Tue, 19 Sep 2017 17:17:09 +0100

> This patch set presents some bug fixes for the HNS3 Ethernet driver identified
> during internal testing & stabilization efforts.
> 
> Change Log:
> Patch V2: Resolved comments from Leon Romanovsky
> Patch V1: Initial Submit

Series applied, thank you.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next 0/4] net: dsa: move master ethtool code
From: David Miller @ 2017-09-19 23:04 UTC (permalink / raw)
  To: f.fainelli; +Cc: vivien.didelot, netdev, linux-kernel, kernel, andrew
In-Reply-To: <80b8afa0-e269-c0b7-da4a-dcb604a239ca@gmail.com>

From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 19 Sep 2017 13:04:56 -0700

> On 09/19/2017 08:56 AM, Vivien Didelot wrote:
>> The DSA core overrides the master device's ethtool_ops structure so that
>> it can inject statistics and such of its dedicated switch CPU port.
>> 
>> This ethtool code is currently called on unnecessary conditions or
>> before the master interface and its switch CPU port get wired up.
>> This patchset fixes this.
>> 
>> Similarly to slave.c where the DSA slave net_device is the entry point
>> of the dsa_slave_* functions, this patchset also isolates the master's
>> ethtool code in a new master.c file, where the DSA master net_device is
>> the entry point of the dsa_master_* functions.
>> 
>> This is a first step towards better control of the master device and
>> support for multiple CPU ports.
> 
> Tested-by: Florian Fainelli <f.fainelli@gmail.com>
> 
> * ethtool -S eth0 -> switch port CPU stats are still correctly overlayed
> * ethtool -s gphy wol g -> both switch port and CPU port correctly
> enable WoL
> * ethtool -i eth0 -> driver still reports correct information

Series applied, thanks everyone.

^ permalink raw reply

* Re: [PATCH] net: ethernet: aquantia: default to no in config
From: vcaputo @ 2017-09-19 23:02 UTC (permalink / raw)
  To: David Miller; +Cc: linux-kernel, netdev
In-Reply-To: <20170919.155231.1474915838737685519.davem@davemloft.net>

On Tue, Sep 19, 2017 at 03:52:31PM -0700, David Miller wrote:
> From: Vito Caputo <vcaputo@pengaru.com>
> Date: Tue, 19 Sep 2017 15:43:15 -0700
> 
> > NET_VENDOR_AQUANTIA was "default y" for some reason, which seems
> > obviously inappropriate.
> 
> It is appropriate.
> 
> We make all vendor guards default to yes.

Thanks for the quick response.

Out of curiosity, what's the rationale for that decision?

^ permalink raw reply

* Re: [PATCH net-next 0/7] net: speedup netns create/delete time
From: David Miller @ 2017-09-19 23:02 UTC (permalink / raw)
  To: edumazet; +Cc: netdev, ebiederm, eric.dumazet
In-Reply-To: <20170918190733.26272-1-edumazet@google.com>

From: Eric Dumazet <edumazet@google.com>
Date: Mon, 18 Sep 2017 12:07:26 -0700

> When rate of netns creation/deletion is high enough,
> we observe softlockups in cleanup_net() caused by huge list
> of netns and way too many rcu_barrier() calls.
> 
> This patch series does some optimizations in kobject,
> and add batching to tunnels so that netns dismantles are
> less costly.
> 
> IPv6 addrlabels also get a per netns list, and tcp_metrics
> also benefit from batch flushing.
> 
> This gives me one order of magnitude gain.
> (~50 ms -> ~5 ms for one netns create/delete pair)

I like it.

Please address the feedback about using skb_put_data() and
resubmit.

Thanks.

^ permalink raw reply

* Re: [PATCH,net-next,0/2] Improve code coverage of syzkaller
From: David Miller @ 2017-09-19 23:01 UTC (permalink / raw)
  To: peterpenkov96; +Cc: netdev
In-Reply-To: <20170919073402.2292-1-peterpenkov96@gmail.com>

From: Petar Penkov <peterpenkov96@gmail.com>
Date: Tue, 19 Sep 2017 00:34:00 -0700

> The following patches address this by providing the user(syzkaller)
> with the ability to send via napi_gro_receive() and napi_gro_frags().
> Additionally, syzkaller can specify how many fragments there are and
> how much data per fragment there is. This is done by exploiting the
> convenient structure of iovecs. Finally, this patch series adds
> support for exercising the flow dissector during fuzzing.
> 
> The code path including napi_gro_receive() can be enabled via the
> CONFIG_TUN_NAPI compile-time flag, and can be used by users other than
> syzkaller. The remainder of the changes in this patch series give the
> user significantly more control over packets entering the kernel. To
> avoid potential security vulnerabilities, hide the ability to send
> custom skbs and the flow dissector code paths behind a run-time flag
> IFF_NAPI_FRAGS that is advertised and accepted only if CONFIG_TUN_NAPI
> is enabled.
> 
> The patch series will be followed with changes to packetdrill, where
> these additions to the TUN driver are exercised and demonstrated.
> This will give the ability to write regression tests for specific
> parts of the early networking stack.
> 
> Patch 1/ Add NAPI struct per receive queue, enable NAPI, and use
> 	 napi_gro_receive() 
> Patch 2/ Use NAPI skb and napi_gro_frags(), exercise flow
> 	 dissector, and allow custom skbs.

I'm happy with everything except the TUN_NAPI Kconfig knob
requirement.

Rebuilding something just to test things isn't going to fly very well.

Please make it secure somehow, enable this stuff by default.

Thanks.

^ permalink raw reply

* Re: [PATCH] net: ethernet: aquantia: default to no in config
From: David Miller @ 2017-09-19 22:59 UTC (permalink / raw)
  To: vcaputo; +Cc: linux-kernel, netdev
In-Reply-To: <20170919230249.GB17797@shells.gnugeneration.com>

From: vcaputo@pengaru.com
Date: Tue, 19 Sep 2017 16:02:49 -0700

> Out of curiosity, what's the rationale for that decision?

So that you don't need to know what special vendor knob needs to be
switched in order to even be offered the config knob for the driver
you are interested in.

^ permalink raw reply

* Re: [PATCH] net: ethernet: aquantia: default to no in config
From: David Miller @ 2017-09-19 22:52 UTC (permalink / raw)
  To: vcaputo; +Cc: linux-kernel, netdev
In-Reply-To: <20170919224315.GA17797@shells.gnugeneration.com>

From: Vito Caputo <vcaputo@pengaru.com>
Date: Tue, 19 Sep 2017 15:43:15 -0700

> NET_VENDOR_AQUANTIA was "default y" for some reason, which seems
> obviously inappropriate.

It is appropriate.

We make all vendor guards default to yes.

^ permalink raw reply

* [PATCH net v2] bpf: fix ri->map_owner pointer on bpf_prog_realloc
From: Daniel Borkmann @ 2017-09-19 22:44 UTC (permalink / raw)
  To: davem; +Cc: john.fastabend, ast, netdev, Daniel Borkmann

Commit 109980b894e9 ("bpf: don't select potentially stale
ri->map from buggy xdp progs") passed the pointer to the prog
itself to be loaded into r4 prior on bpf_redirect_map() helper
call, so that we can store the owner into ri->map_owner out of
the helper.

Issue with that is that the actual address of the prog is still
subject to change when subsequent rewrites occur that require
slow path in bpf_prog_realloc() to alloc more memory, e.g. from
patching inlining helper functions or constant blinding. Thus,
we really need to take prog->aux as the address we're holding,
which also works with prog clones as they share the same aux
object.

Instead of then fetching aux->prog during runtime, which could
potentially incur cache misses due to false sharing, we are
going to just use aux for comparison on the map owner. This
will also keep the patchlet of the same size, and later check
in xdp_map_invalid() only accesses read-only aux pointer from
the prog, it's also in the same cacheline already from prior
access when calling bpf_func.

Fixes: 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 v1->v2:
  - Decided to go with prog->aux instead.

 kernel/bpf/verifier.c |  7 ++++++-
 net/core/filter.c     | 24 +++++++++++++++---------
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 799b245..b914fbe 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4205,7 +4205,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 		}
 
 		if (insn->imm == BPF_FUNC_redirect_map) {
-			u64 addr = (unsigned long)prog;
+			/* Note, we cannot use prog directly as imm as subsequent
+			 * rewrites would still change the prog pointer. The only
+			 * stable address we can use is aux, which also works with
+			 * prog clones during blinding.
+			 */
+			u64 addr = (unsigned long)prog->aux;
 			struct bpf_insn r4_ld[] = {
 				BPF_LD_IMM64(BPF_REG_4, addr),
 				*insn,
diff --git a/net/core/filter.c b/net/core/filter.c
index 24dd33d..82edad5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1794,7 +1794,7 @@ struct redirect_info {
 	u32 flags;
 	struct bpf_map *map;
 	struct bpf_map *map_to_flush;
-	const struct bpf_prog *map_owner;
+	unsigned long   map_owner;
 };
 
 static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -2500,11 +2500,17 @@ void xdp_do_flush_map(void)
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush_map);
 
+static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
+				   unsigned long aux)
+{
+	return (unsigned long)xdp_prog->aux != aux;
+}
+
 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 			       struct bpf_prog *xdp_prog)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
-	const struct bpf_prog *map_owner = ri->map_owner;
+	unsigned long map_owner = ri->map_owner;
 	struct bpf_map *map = ri->map;
 	struct net_device *fwd = NULL;
 	u32 index = ri->ifindex;
@@ -2512,9 +2518,9 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 
 	ri->ifindex = 0;
 	ri->map = NULL;
-	ri->map_owner = NULL;
+	ri->map_owner = 0;
 
-	if (unlikely(map_owner != xdp_prog)) {
+	if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
 		err = -EFAULT;
 		map = NULL;
 		goto err;
@@ -2574,7 +2580,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 			    struct bpf_prog *xdp_prog)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
-	const struct bpf_prog *map_owner = ri->map_owner;
+	unsigned long map_owner = ri->map_owner;
 	struct bpf_map *map = ri->map;
 	struct net_device *fwd = NULL;
 	u32 index = ri->ifindex;
@@ -2583,10 +2589,10 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 
 	ri->ifindex = 0;
 	ri->map = NULL;
-	ri->map_owner = NULL;
+	ri->map_owner = 0;
 
 	if (map) {
-		if (unlikely(map_owner != xdp_prog)) {
+		if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
 			err = -EFAULT;
 			map = NULL;
 			goto err;
@@ -2632,7 +2638,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 	ri->ifindex = ifindex;
 	ri->flags = flags;
 	ri->map = NULL;
-	ri->map_owner = NULL;
+	ri->map_owner = 0;
 
 	return XDP_REDIRECT;
 }
@@ -2646,7 +2652,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 };
 
 BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
-	   const struct bpf_prog *, map_owner)
+	   unsigned long, map_owner)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
 
-- 
1.9.3

^ permalink raw reply related

* Re: [PATCH net] bpf: do not disable/enable BH in bpf_map_free_id()
From: David Miller @ 2017-09-19 22:43 UTC (permalink / raw)
  To: eric.dumazet; +Cc: kafai, ast, netdev
In-Reply-To: <1505837759.29839.64.camel@edumazet-glaptop3.roam.corp.google.com>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 19 Sep 2017 09:15:59 -0700

> From: Eric Dumazet <edumazet@google.com>
> 
> syzkaller reported following splat [1]
> 
> Since hard irq are disabled by the caller, bpf_map_free_id()
> should not try to enable/disable BH.
> 
> Another solution would be to change htab_map_delete_elem() to
> defer the free_htab_elem() call after
> raw_spin_unlock_irqrestore(&b->lock, flags), but this might be not
> enough to cover other code paths.
> 
> [1]
 ...
> Fixes: f3f1c054c288 ("bpf: Introduce bpf_map ID")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Martin KaFai Lau <kafai@fb.com>

Applied and queued up for -stable, thanks Eric.

^ permalink raw reply

* [PATCH] net: ethernet: aquantia: default to no in config
From: Vito Caputo @ 2017-09-19 22:43 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev

NET_VENDOR_AQUANTIA was "default y" for some reason, which seems
obviously inappropriate.
---
 drivers/net/ethernet/aquantia/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aquantia/Kconfig b/drivers/net/ethernet/aquantia/Kconfig
index cdf78e069a39..6167b13cf349 100644
--- a/drivers/net/ethernet/aquantia/Kconfig
+++ b/drivers/net/ethernet/aquantia/Kconfig
@@ -4,7 +4,7 @@
 
 config NET_VENDOR_AQUANTIA
 	bool "aQuantia devices"
-	default y
+	default n
 	---help---
 	  Set this to y if you have an Ethernet network cards that uses the aQuantia
 	  AQC107/AQC108 chipset.
-- 
2.11.0

^ permalink raw reply related

* Latest net-next from GIT panic
From: Paweł Staszewski @ 2017-09-19 22:35 UTC (permalink / raw)
  To: Linux Kernel Network Developers
In-Reply-To: <4745525f-18e4-7f69-fe21-8e507e407b33@itcare.pl>

Just tried latest net-next git and found kernel panic.

Below link to bugzilla.

https://bugzilla.kernel.org/attachment.cgi?id=258499

^ permalink raw reply

* Re: [PATCH] ipv6_skip_exthdr: use ipv6_authlen for AH hdrlen
From: David Miller @ 2017-09-19 22:32 UTC (permalink / raw)
  To: qasdfgtyuiop; +Cc: trivial, netdev, kuznet, yoshfuji
In-Reply-To: <20170919125950.11537-1-qasdfgtyuiop@gmail.com>

From: Xiang Gao <qasdfgtyuiop@gmail.com>
Date: Tue, 19 Sep 2017 08:59:50 -0400

> In ipv6_skip_exthdr, the lengh of AH header is computed manually
> as (hp->hdrlen+2)<<2. However, in include/linux/ipv6.h, a macro
> named ipv6_authlen is already defined for exactly the same job. This
> commit replaces the manual computation code with the macro.

All patch submissions must have a proper signoff.

Also, please use a proper subsystem prefix in your Subject
line "[PATCH] ipv6: Use ipv6_authlen for AH hdrlen in ipv6_skip_exthdr()"
would have been much better as "ipv6: " is the appropriate
subsystem prefix to use here.

Thanks.

^ permalink raw reply

* Re: [PATCH net-next] selftests: rtnetlink.sh: add test case for device ifalias
From: David Miller @ 2017-09-19 22:30 UTC (permalink / raw)
  To: fw; +Cc: netdev
In-Reply-To: <20170919124217.25105-1-fw@strlen.de>

From: Florian Westphal <fw@strlen.de>
Date: Tue, 19 Sep 2017 14:42:17 +0200

> Signed-off-by: Florian Westphal <fw@strlen.de>

Applied, thanks Florian.

^ permalink raw reply

* Re: [PATCH net] tcp: fastopen: fix on syn-data transmit failure
From: Yuchung Cheng @ 2017-09-19 22:26 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Neal Cardwell, netdev
In-Reply-To: <1505840757.29839.77.camel@edumazet-glaptop3.roam.corp.google.com>

On Tue, Sep 19, 2017 at 10:05 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> Our recent change exposed a bug in TCP Fastopen Client that syzkaller
> found right away [1]
>
> When we prepare skb with SYN+DATA, we attempt to transmit it,
> and we update socket state as if the transmit was a success.
>
> In socket RTX queue we have two skbs, one with the SYN alone,
> and a second one containing the DATA.
>
> When (malicious) ACK comes in, we now complain that second one had no
> skb_mstamp.
>
> The proper fix is to make sure that if the transmit failed, we do not
> pretend we sent the DATA skb, and make it our send_head.
>
> When 3WHS completes, we can now send the DATA right away, without having
> to wait for a timeout.
>
> [1]
> WARNING: CPU: 0 PID: 100189 at net/ipv4/tcp_input.c:3117 tcp_clean_rtx_queue+0x2057/0x2ab0 net/ipv4/tcp_input.c:3117()
>
>  WARN_ON_ONCE(last_ackt == 0);
>
> Modules linked in:
> CPU: 0 PID: 100189 Comm: syz-executor1 Not tainted
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
>  0000000000000000 ffff8800b35cb1d8 ffffffff81cad00d 0000000000000000
>  ffffffff828a4347 ffff88009f86c080 ffffffff8316eb20 0000000000000d7f
>  ffff8800b35cb220 ffffffff812c33c2 ffff8800baad2440 00000009d46575c0
> Call Trace:
>  [<ffffffff81cad00d>] __dump_stack
>  [<ffffffff81cad00d>] dump_stack+0xc1/0x124
>  [<ffffffff812c33c2>] warn_slowpath_common+0xe2/0x150
>  [<ffffffff812c361e>] warn_slowpath_null+0x2e/0x40
>  [<ffffffff828a4347>] tcp_clean_rtx_queue+0x2057/0x2ab0 n
>  [<ffffffff828ae6fd>] tcp_ack+0x151d/0x3930
>  [<ffffffff828baa09>] tcp_rcv_state_process+0x1c69/0x4fd0
>  [<ffffffff828efb7f>] tcp_v4_do_rcv+0x54f/0x7c0
>  [<ffffffff8258aacb>] sk_backlog_rcv
>  [<ffffffff8258aacb>] __release_sock+0x12b/0x3a0
>  [<ffffffff8258ad9e>] release_sock+0x5e/0x1c0
>  [<ffffffff8294a785>] inet_wait_for_connect
>  [<ffffffff8294a785>] __inet_stream_connect+0x545/0xc50
>  [<ffffffff82886f08>] tcp_sendmsg_fastopen
>  [<ffffffff82886f08>] tcp_sendmsg+0x2298/0x35a0
>  [<ffffffff82952515>] inet_sendmsg+0xe5/0x520
>  [<ffffffff8257152f>] sock_sendmsg_nosec
>  [<ffffffff8257152f>] sock_sendmsg+0xcf/0x110
>
> Fixes: 8c72c65b426b ("tcp: update skb->skb_mstamp more carefully")
> Fixes: 783237e8daf1 ("net-tcp: Fast Open client - sending SYN-data")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Cc: Neal Cardwell <ncardwell@google.com>
> Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>

Thanks Eric for fixing this. The current arrangement of SYN plus data
packet seems to cause more code for error cases. I am wondering a
(subsequent) refactoring patch can make it simpler by updating the
states after a successful transmission (instead of update and revert).

> ---
>  net/ipv4/tcp_output.c |    9 +++++++++
>  1 file changed, 9 insertions(+)
>
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 517d737059d18d8821b65dcdf54d9bb3448784c2..0bc9e46a53696578eb6e911f2f75e6b34c80894f 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -3389,6 +3389,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
>                 goto done;
>         }
>
> +       /* data was not sent, this is our new send_head */
> +       sk->sk_send_head = syn_data;
> +       tp->packets_out -= tcp_skb_pcount(syn_data);
> +
>  fallback:
>         /* Send a regular SYN with Fast Open cookie request option */
>         if (fo->cookie.len > 0)
> @@ -3441,6 +3445,11 @@ int tcp_connect(struct sock *sk)
>          */
>         tp->snd_nxt = tp->write_seq;
>         tp->pushed_seq = tp->write_seq;
> +       buff = tcp_send_head(sk);
> +       if (unlikely(buff)) {
> +               tp->snd_nxt     = TCP_SKB_CB(buff)->seq;
> +               tp->pushed_seq  = TCP_SKB_CB(buff)->seq;
> +       }
>         TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
>
>         /* Timer for repeating the SYN until an answer. */
>
>

^ permalink raw reply

* Re: [PATCH v2 net-next] net: sk_buff rbnode reorg
From: David Miller @ 2017-09-19 22:20 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, soheil, weiwan, willemb
In-Reply-To: <1505823264.29839.54.camel@edumazet-glaptop3.roam.corp.google.com>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 19 Sep 2017 05:14:24 -0700

> From: Eric Dumazet <edumazet@google.com>
> 
> skb->rbnode shares space with skb->next, skb->prev and skb->tstamp
> 
> Current uses (TCP receive ofo queue and netem) need to save/restore
> tstamp, while skb->dev is either NULL (TCP) or a constant for a given
> queue (netem).
>     
> Since we plan using an RB tree for TCP retransmit queue to speedup SACK
> processing with large BDP, this patch exchanges skb->dev and
> skb->tstamp.
>     
> This saves some overhead in both TCP and netem.
> 
> v2: removes the swtstamp field from struct tcp_skb_cb
>     
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Looks great, applied, thanks Eric.

^ permalink raw reply

* Re: [PATCH] rhashtable: Documentation tweak
From: David Miller @ 2017-09-19 22:19 UTC (permalink / raw)
  To: agruenba; +Cc: tgraf, herbert, netdev
In-Reply-To: <20170919104137.22916-1-agruenba@redhat.com>

From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 19 Sep 2017 12:41:37 +0200

> Clarify that rhashtable_walk_{stop,start} will not reset the iterator to
> the beginning of the hash table.  Confusion between rhashtable_walk_enter
> and rhashtable_walk_start has already lead to a bug.
> 
> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>

Applied, thanks.

^ permalink raw reply

* Reporting transceiver with ethtool_link_ksettings
From: Florian Fainelli @ 2017-09-19 22:03 UTC (permalink / raw)
  To: netdev, davem, linville, decot

Hi,

After tracking down why all network interfaces using PHYLIB and using
phy_ethtool_link_ksettings_get would report "Transceiver: internal" it
became clear that's because ethtool_link_ksettings deprecated that field...

We could have deprecated setting the transceiver which makes sense, but
not deprecating getting the transceiver type which is useful information.

So what are the options here? Would this be acceptable:

diff --git a/ethtool-copy.h b/ethtool-copy.h
index 06fc04c73079..bb9b55806bf4 100644
--- a/ethtool-copy.h
+++ b/ethtool-copy.h
@@ -1752,7 +1752,9 @@ struct ethtool_link_settings {
        __u8    eth_tp_mdix;
        __u8    eth_tp_mdix_ctrl;
        __s8    link_mode_masks_nwords;
-       __u32   reserved[8];
+       __u8    transceiver;
+       __u8    reserved1[3];
+       __u32   reserved[7];
        __u32   link_mode_masks[0];
        /* layout of link_mode_masks fields:
         * __u32 map_supported[link_mode_masks_nwords];

^ permalink raw reply related

* Re: [PATCH] tcp: avoid bogus warning in tcp_clean_rtx_queue
From: David Miller @ 2017-09-19 22:01 UTC (permalink / raw)
  To: arnd
  Cc: kuznet, yoshfuji, edumazet, ncardwell, ycheng, soheil, fw, netdev,
	linux-kernel
In-Reply-To: <CAK8P3a3G78pqFsoimeNrKnPLOUYR+X1qzhbKyabn_tN4oZMLyQ@mail.gmail.com>

From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 19 Sep 2017 23:32:33 +0200

> On Tue, Sep 19, 2017 at 11:02 PM, David Miller <davem@davemloft.net> wrote:
>> What cpu did you test the object code generation upon and does that
>> cpu have branch prediction hints in the target you are building for?
> 
> This was a randconfig build targetting ARMv5. I'm pretty sure that has
> no such hint instructions.

I just tested on sparc64 and it changed the branch prediction:

 .L2157:
-       brz,pn  %i3, .L1898     ! first_ackt,
+       brz,pt  %i2, .L1898     ! first_ackt,
         mov    -1, %o2 !, seq_rtt_us

^ permalink raw reply

* Reply
From: a @ 2017-09-19 21:34 UTC (permalink / raw)
  To: Recipients

Are you free for discussion?

^ permalink raw reply

* Re: [PATCH] tcp: avoid bogus warning in tcp_clean_rtx_queue
From: Arnd Bergmann @ 2017-09-19 21:32 UTC (permalink / raw)
  To: David Miller
  Cc: Alexey Kuznetsov, yoshfuji, Eric Dumazet, Neal Cardwell, ycheng,
	soheil, Florian Westphal, Networking, Linux Kernel Mailing List
In-Reply-To: <20170919.140250.1393293793761020505.davem@davemloft.net>

On Tue, Sep 19, 2017 at 11:02 PM, David Miller <davem@davemloft.net> wrote:
> From: Arnd Bergmann <arnd@arndb.de>
> Date: Mon, 18 Sep 2017 22:48:47 +0200
>
>> gcc-4.9 warns that it cannot trace the state of the 'last_ackt'
>> variable since the change to the TCP timestamping code, when
>> CONFIG_PROFILE_ANNOTATED_BRANCHES is set:
>>
>> net/ipv4/tcp_input.c: In function 'tcp_clean_rtx_queue':
>> include/net/tcp.h:757:23: error: 'last_ackt' may be used uninitialized in this function [-Werror=maybe-uninitialized]
>>
>> Other gcc versions, both older and newer do now show this
>> warning. Removing the 'likely' annotation makes it go away,
>> and has no effect on the object code without
>> CONFIG_PROFILE_ANNOTATED_BRANCHES, as tested with gcc-4.9
>> and gcc-7.1.1, so this seems to be a safe workaround.
>>
>> Fixes: 9a568de4818d ("tcp: switch TCP TS option (RFC 7323) to 1ms clock")
>> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
>
> This reaches the limits at which I am willing to work around compiler
> stuff.

I see. It is a definitely a really obscure case, so if there is any doubt
that the workaround is harmless, then we shouldn't take it. The warning
only shows up on gcc-4.9 but not anything newer, and we disable
-Wmaybe-uninitialized on all older versions because of the false
positives.

It's also possible that it needed a combination of multiple other options,
not just CONFIG_PROFILE_ANNOTATED_BRANCHES. I build-tested
with gcc-4.9 to see if anything would show up that we don't also get a
warning for in gcc-7, and this came up once in several hundred randconfig
builds across multiple architectures (no other new warnings appeared
with gcc-4.9).

> What cpu did you test the object code generation upon and does that
> cpu have branch prediction hints in the target you are building for?

This was a randconfig build targetting ARMv5. I'm pretty sure that has
no such hint instructions.

       Arnd

^ permalink raw reply

* Re: [PATCH 3/3][v2] selftests: silence test output by default
From: Shuah Khan @ 2017-09-19 21:31 UTC (permalink / raw)
  To: josef, davem, netdev, linux-kselftest; +Cc: Josef Bacik, Shuah Khan
In-Reply-To: <1505829088-1823-3-git-send-email-jbacik@fb.com>

On 09/19/2017 07:51 AM, josef@toxicpanda.com wrote:
> From: Josef Bacik <jbacik@fb.com>
> 
> Some of the networking tests are very noisy and make it impossible to
> see if we actually passed the tests as they run.  Default to suppressing
> the output from any tests run in order to make it easier to track what
> failed.
> 
> Signed-off-by: Josef Bacik <jbacik@fb.com>
> ---
> v1->v2:
> - dump output into /tmp/testname instead of /dev/null
> 

Thanks for the fix. Applied to linux-kselftest for 4.14-rc2

-- Shuah

^ permalink raw reply

* Re: [PATCH net-next 0/3] Implement delete for BPF LPM trie
From: Daniel Mack @ 2017-09-19 21:31 UTC (permalink / raw)
  To: David Miller, kraigatgoog; +Cc: ast, daniel, netdev
In-Reply-To: <20170919.142935.72220924483937276.davem@davemloft.net>

On 09/19/2017 11:29 PM, David Miller wrote:
> From: Craig Gallek <kraigatgoog@gmail.com>
> Date: Tue, 19 Sep 2017 17:16:13 -0400
> 
>> On Tue, Sep 19, 2017 at 5:13 PM, Daniel Mack <daniel@zonque.org> wrote:
>>> On 09/19/2017 10:55 PM, David Miller wrote:
>>>> From: Craig Gallek <kraigatgoog@gmail.com>
>>>> Date: Mon, 18 Sep 2017 15:30:54 -0400
>>>>
>>>>> This was previously left as a TODO.  Add the implementation and
>>>>> extend the test to cover it.
>>>>
>>>> Series applied, thanks.
>>>>
>>>
>>> Hmm, I think these patches need some more discussion regarding the IM
>>> nodes handling, see the reply I sent an hour ago. Could you wait for
>>> that before pushing your tree?
>>
>> I can follow up with a patch to implement your suggestion.  It's
>> really just an efficiency improvement, though, so I think it's ok to
>> handle independently. (Sorry, I haven't had a chance to play with the
>> implementation details yet).
> 
> Sorry, I thought the core implementation had been agreed upon and the
> series was OK.  All that was asked for were simplifications and/or
> optimization which could be done via follow-up patches.
> 
> It's already pushed out to my tree, so I would need to do a real
> revert.
> 
> I hope that won't be necessary.
> 

Nah, it's okay I guess. I trust Craig to send follow-up patches. After
all, efficiency is what this whole exercise is all about, so I think it
should be done correctly :)



Thanks,
Daniel

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox