Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v4 net-next 02/11] ibmvnic: Replace is_closed with state field
From: Nathan Fontenot @ 2017-05-03 18:04 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon
In-Reply-To: <20170503180246.29742.64039.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>

Replace the is_closed flag in the ibmvnic adapter strcut with a
more comprehensive state field that tracks the current state of
the driver.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |   20 +++++++++++++-------
 drivers/net/ethernet/ibm/ibmvnic.h |   12 ++++++++++--
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index c67f1d6..40a8ba0 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -669,7 +669,9 @@ static int ibmvnic_open(struct net_device *netdev)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	int i, rc;
 
-	if (adapter->is_closed) {
+	adapter->state = VNIC_OPENING;
+
+	if (adapter->state == VNIC_CLOSED) {
 		rc = ibmvnic_init(adapter);
 		if (rc)
 			return rc;
@@ -704,7 +706,7 @@ static int ibmvnic_open(struct net_device *netdev)
 		release_resources(adapter);
 	} else {
 		netif_tx_start_all_queues(netdev);
-		adapter->is_closed = false;
+		adapter->state = VNIC_OPEN;
 	}
 
 	return rc;
@@ -733,7 +735,7 @@ static int ibmvnic_close(struct net_device *netdev)
 	int rc = 0;
 	int i;
 
-	adapter->closing = true;
+	adapter->state = VNIC_CLOSING;
 	disable_sub_crqs(adapter);
 
 	if (adapter->napi) {
@@ -748,8 +750,7 @@ static int ibmvnic_close(struct net_device *netdev)
 
 	release_resources(adapter);
 
-	adapter->is_closed = true;
-	adapter->closing = false;
+	adapter->state = VNIC_CLOSED;
 	return rc;
 }
 
@@ -1860,7 +1861,8 @@ static int pending_scrq(struct ibmvnic_adapter *adapter,
 {
 	union sub_crq *entry = &scrq->msgs[scrq->cur];
 
-	if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP || adapter->closing)
+	if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP ||
+	    adapter->state == VNIC_CLOSING)
 		return 1;
 	else
 		return 0;
@@ -3353,6 +3355,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 		return -ENOMEM;
 
 	adapter = netdev_priv(netdev);
+	adapter->state = VNIC_PROBING;
 	dev_set_drvdata(&dev->dev, netdev);
 	adapter->vdev = dev;
 	adapter->netdev = netdev;
@@ -3380,7 +3383,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	}
 
 	netdev->mtu = adapter->req_mtu - ETH_HLEN;
-	adapter->is_closed = false;
 
 	rc = register_netdev(netdev);
 	if (rc) {
@@ -3390,6 +3392,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	}
 	dev_info(&dev->dev, "ibmvnic registered\n");
 
+	adapter->state = VNIC_PROBED;
 	return 0;
 }
 
@@ -3398,12 +3401,15 @@ static int ibmvnic_remove(struct vio_dev *dev)
 	struct net_device *netdev = dev_get_drvdata(&dev->dev);
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
+	adapter->state = VNIC_REMOVING;
 	unregister_netdev(netdev);
 
 	release_resources(adapter);
 	release_sub_crqs(adapter);
 	release_crq_queue(adapter);
 
+	adapter->state = VNIC_REMOVED;
+
 	free_netdev(netdev);
 	dev_set_drvdata(&dev->dev, NULL);
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index a69979f..03a866f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -913,6 +913,15 @@ struct ibmvnic_error_buff {
 	__be32 error_id;
 };
 
+enum vnic_state {VNIC_PROBING = 1,
+		 VNIC_PROBED,
+		 VNIC_OPENING,
+		 VNIC_OPEN,
+		 VNIC_CLOSING,
+		 VNIC_CLOSED,
+		 VNIC_REMOVING,
+		 VNIC_REMOVED};
+
 struct ibmvnic_adapter {
 	struct vio_dev *vdev;
 	struct net_device *netdev;
@@ -962,7 +971,6 @@ struct ibmvnic_adapter {
 	u64 promisc;
 
 	struct ibmvnic_tx_pool *tx_pool;
-	bool closing;
 	struct completion init_done;
 	int init_done_rc;
 
@@ -1011,5 +1019,5 @@ struct ibmvnic_adapter {
 	struct work_struct ibmvnic_xport;
 	struct tasklet_struct tasklet;
 	bool failover;
-	bool is_closed;
+	enum vnic_state state;
 };

^ permalink raw reply related

* [PATCH v4 net-next 01/11] ibmvnic: Move resource initialization to its own routine
From: Nathan Fontenot @ 2017-05-03 18:04 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon
In-Reply-To: <20170503180246.29742.64039.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>

Move all of the calls to initialize resources for the driver to
a separate routine.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |   71 ++++++++++++++++++++----------------
 1 file changed, 39 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4fcd2f0..c67f1d6 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -624,22 +624,10 @@ static int set_real_num_queues(struct net_device *netdev)
 	return rc;
 }
 
-static int ibmvnic_open(struct net_device *netdev)
+static int init_resources(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-	struct device *dev = &adapter->vdev->dev;
-	int rc = 0;
-	int i;
-
-	if (adapter->is_closed) {
-		rc = ibmvnic_init(adapter);
-		if (rc)
-			return rc;
-	}
-
-	rc = ibmvnic_login(netdev);
-	if (rc)
-		return rc;
+	struct net_device *netdev = adapter->netdev;
+	int i, rc;
 
 	rc = set_real_num_queues(netdev);
 	if (rc)
@@ -647,7 +635,7 @@ static int ibmvnic_open(struct net_device *netdev)
 
 	rc = init_sub_crq_irqs(adapter);
 	if (rc) {
-		dev_err(dev, "failed to initialize sub crq irqs\n");
+		netdev_err(netdev, "failed to initialize sub crq irqs\n");
 		return -1;
 	}
 
@@ -659,25 +647,47 @@ static int ibmvnic_open(struct net_device *netdev)
 	adapter->napi = kcalloc(adapter->req_rx_queues,
 				sizeof(struct napi_struct), GFP_KERNEL);
 	if (!adapter->napi)
-		goto ibmvnic_open_fail;
+		return -ENOMEM;
+
 	for (i = 0; i < adapter->req_rx_queues; i++) {
 		netif_napi_add(netdev, &adapter->napi[i], ibmvnic_poll,
 			       NAPI_POLL_WEIGHT);
-		napi_enable(&adapter->napi[i]);
 	}
 
 	send_map_query(adapter);
 
 	rc = init_rx_pools(netdev);
 	if (rc)
-		goto ibmvnic_open_fail;
+		return rc;
 
 	rc = init_tx_pools(netdev);
+	return rc;
+}
+
+static int ibmvnic_open(struct net_device *netdev)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	int i, rc;
+
+	if (adapter->is_closed) {
+		rc = ibmvnic_init(adapter);
+		if (rc)
+			return rc;
+	}
+
+	rc = ibmvnic_login(netdev);
 	if (rc)
-		goto ibmvnic_open_fail;
+		return rc;
+
+	rc = init_resources(adapter);
+	if (rc)
+		return rc;
 
 	replenish_pools(adapter);
 
+	for (i = 0; i < adapter->req_rx_queues; i++)
+		napi_enable(&adapter->napi[i]);
+
 	/* We're ready to receive frames, enable the sub-crq interrupts and
 	 * set the logical link state to up
 	 */
@@ -688,19 +698,16 @@ static int ibmvnic_open(struct net_device *netdev)
 		enable_scrq_irq(adapter, adapter->tx_scrq[i]);
 
 	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
-	if (rc)
-		goto ibmvnic_open_fail;
-
-	netif_tx_start_all_queues(netdev);
-	adapter->is_closed = false;
-
-	return 0;
+	if (rc) {
+		for (i = 0; i < adapter->req_rx_queues; i++)
+			napi_disable(&adapter->napi[i]);
+		release_resources(adapter);
+	} else {
+		netif_tx_start_all_queues(netdev);
+		adapter->is_closed = false;
+	}
 
-ibmvnic_open_fail:
-	for (i = 0; i < adapter->req_rx_queues; i++)
-		napi_disable(&adapter->napi[i]);
-	release_resources(adapter);
-	return -ENOMEM;
+	return rc;
 }
 
 static void disable_sub_crqs(struct ibmvnic_adapter *adapter)

^ permalink raw reply related

* [PATCH v4 net-next 00/11] ibmvnic: Updated reset handler and code fixes
From: Nathan Fontenot @ 2017-05-03 18:04 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon

This set of patches multiple code fixes and a new rest handler
for the ibmvnic driver. In order to implement the new reset handler
for the ibmvnic driver resource initialization needed to be moved to
its own routine, a state variable is introduced to replace the
various is_* flags in the driver, and a new routine to handle the
assorted reasons the driver can be reset.

v4 updates:

Patch 3/11: Corrected trailing whitespace
Patch 7/11: Corrected trailing whitespace

v3 updates:

Patch 10/11: Correct patch subject line to be a description of the patch.

v2 updates:

Patch 11/11: Use __netif_subqueue_stopped() instead of
netif_subqueue_stopped() to avoid possible use of an un-initialized
skb variable.

---

Nathan Fontenot (10):
      ibmvnic: Move resource initialization to its own routine
      ibmvnic: Replace is_closed with state field
      ibmvnic: Updated reset handling
      ibmvnic: Delete napi's when releasing driver resources
      ibmvnic: Whitespace correction in release_rx_pools
      ibmvnic: Clean up tx pools when closing
      ibmvnic: Wait for any pending scrqs entries at driver close
      ibmvnic: Check for driver reset first in ibmvnic_xmit
      ibmvnic: Continue skb processing after skb completion error
      ibmvnic: Move queue restarting in ibmvnic_tx_complete

Thomas Falcon (1):
      ibmvnic: Record SKB RX queue during poll

 drivers/net/ethernet/ibm/ibmvnic.c |  561 +++++++++++++++++++++++-------------
 drivers/net/ethernet/ibm/ibmvnic.h |   31 ++
 2 files changed, 388 insertions(+), 204 deletions(-)

^ permalink raw reply

* Re: [PATCH] net/sched: remove redundant null check on head
From: Jiri Pirko @ 2017-05-03 14:15 UTC (permalink / raw)
  To: Colin King
  Cc: Jamal Hadi Salim, Cong Wang, netdev, kernel-janitors,
	David S . Miller, linux-kernel
In-Reply-To: <20170503135040.32023-1-colin.king@canonical.com>

Wed, May 03, 2017 at 03:50:40PM CEST, colin.king@canonical.com wrote:
>From: Colin Ian King <colin.king@canonical.com>
>
>head is previously null checked and so the 2nd null check on head
>is redundant and therefore can be removed.
>
>Detected by CoverityScan, CID#1399505 ("Logically dead code")
>
>Signed-off-by: Colin Ian King <colin.king@canonical.com>

Acked-by: Jiri Pirko <jiri@mellanox.com>

^ permalink raw reply

* Re: [PATCH 00/16] Netfilter/IPVS/OVS fixes for net
From: David Miller @ 2017-05-03 14:11 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel, netdev
In-Reply-To: <1493803931-2837-1-git-send-email-pablo@netfilter.org>

From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed,  3 May 2017 11:31:55 +0200

> The following patchset contains a rather large batch of Netfilter, IPVS
> and OVS fixes for your net tree. This includes fixes for ctnetlink, the
> userspace conntrack helper infrastructure, conntrack OVS support,
> ebtables DNAT target, several leaks in error path among other. More
> specifically, they are:
 ...
> You can pull these changes from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git

Pulled, thanks Pablo.

^ permalink raw reply

* Re: [PATCH] netfilter: conntrack: Force inlining of build check to prevent build failure
From: Pablo Neira Ayuso @ 2017-05-03 14:10 UTC (permalink / raw)
  To: David Miller
  Cc: geert, fw, kadlec, arnd, netfilter-devel, coreteam, netdev,
	linux-kernel
In-Reply-To: <20170503.095516.84626203716316173.davem@davemloft.net>

On Wed, May 03, 2017 at 09:55:16AM -0400, David Miller wrote:
> From: Geert Uytterhoeven <geert@linux-m68k.org>
> Date: Wed,  3 May 2017 14:18:43 +0200
> 
> > If gcc (e.g. 4.1.2) decides not to inline total_extension_size(), the
> > build will fail with:
> > 
> >     net/built-in.o: In function `nf_conntrack_init_start':
> >     (.text+0x9baf6): undefined reference to `__compiletime_assert_1893'
> > 
> > or
> > 
> >     ERROR: "__compiletime_assert_1893" [net/netfilter/nf_conntrack.ko] undefined!
> > 
> > Fix this by forcing inlining of total_extension_size().
> > 
> > Fixes: b3a5db109e0670d6 ("netfilter: conntrack: use u8 for extension sizes again")
> > Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
> 
> Pablo, I'm going to apply this directly to my tree to fix this build
> failure, I hope you don't mind.

Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>

^ permalink raw reply

* Re: [PATCH] netfilter: conntrack: Force inlining of build check to prevent build failure
From: David Miller @ 2017-05-03 13:55 UTC (permalink / raw)
  To: geert
  Cc: fw, pablo, kadlec, arnd, netfilter-devel, coreteam, netdev,
	linux-kernel
In-Reply-To: <1493813923-5441-1-git-send-email-geert@linux-m68k.org>

From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed,  3 May 2017 14:18:43 +0200

> If gcc (e.g. 4.1.2) decides not to inline total_extension_size(), the
> build will fail with:
> 
>     net/built-in.o: In function `nf_conntrack_init_start':
>     (.text+0x9baf6): undefined reference to `__compiletime_assert_1893'
> 
> or
> 
>     ERROR: "__compiletime_assert_1893" [net/netfilter/nf_conntrack.ko] undefined!
> 
> Fix this by forcing inlining of total_extension_size().
> 
> Fixes: b3a5db109e0670d6 ("netfilter: conntrack: use u8 for extension sizes again")
> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>

Pablo, I'm going to apply this directly to my tree to fix this build
failure, I hope you don't mind.

Thanks.

^ permalink raw reply

* Re: [PATCH] test_bpf: Use ULL suffix for 64-bit constants
From: David Miller @ 2017-05-03 13:54 UTC (permalink / raw)
  To: geert; +Cc: ast, daniel, netdev, linux-kernel
In-Reply-To: <1493811064-24542-1-git-send-email-geert@linux-m68k.org>

From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed,  3 May 2017 13:31:04 +0200

> On 32-bit:
> 
>     lib/test_bpf.c:4772: warning: integer constant is too large for ‘unsigned long’ type
>     lib/test_bpf.c:4772: warning: integer constant is too large for ‘unsigned long’ type
>     lib/test_bpf.c:4773: warning: integer constant is too large for ‘unsigned long’ type
>     lib/test_bpf.c:4773: warning: integer constant is too large for ‘unsigned long’ type
>     lib/test_bpf.c:4787: warning: integer constant is too large for ‘unsigned long’ type
>     lib/test_bpf.c:4787: warning: integer constant is too large for ‘unsigned long’ type
>     lib/test_bpf.c:4801: warning: integer constant is too large for ‘unsigned long’ type
>     lib/test_bpf.c:4801: warning: integer constant is too large for ‘unsigned long’ type
>     lib/test_bpf.c:4802: warning: integer constant is too large for ‘unsigned long’ type
>     lib/test_bpf.c:4802: warning: integer constant is too large for ‘unsigned long’ type
> 
> On 32-bit systems, "long" is only 32-bit.
> Replace the "UL" suffix by "ULL" to fix this.
> 
> Fixes: 85f68fe898320575 ("bpf, arm64: implement jiting of BPF_XADD")
> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>

Applied.

^ permalink raw reply

* Re: [PATCH 1/1] net: usb: qmi_wwan: add Telit ME910 support
From: David Miller @ 2017-05-03 13:54 UTC (permalink / raw)
  To: dnlplm; +Cc: bjorn, netdev
In-Reply-To: <1493800211-7758-1-git-send-email-dnlplm@gmail.com>

From: Daniele Palmas <dnlplm@gmail.com>
Date: Wed,  3 May 2017 10:30:11 +0200

> This patch adds support for Telit ME910 PID 0x1100.
> 
> Signed-off-by: Daniele Palmas <dnlplm@gmail.com>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* Re: [PATCH net] tg3: don't clear stats while tg3_close
From: David Miller @ 2017-05-03 13:54 UTC (permalink / raw)
  To: yuehaibing; +Cc: netdev, weiyongjun1
In-Reply-To: <20170503075132.2648-1-yuehaibing@huawei.com>

From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 3 May 2017 15:51:32 +0800

> Now tg3 NIC's stats will be cleared after ifdown/ifup. bond_get_stats traverse
> its salves to get statistics,cumulative the increment.If a tg3 NIC is added to
> bonding as a slave,ifdown/ifup will cause bonding's stats become tremendous value
> (ex.1638.3 PiB) because of negative increment.
> 
> Fixes: 92feeabf3f67 ("tg3: Save stats across chip resets")
> Signed-off-by: YueHaibing <yuehaibing@huawei.com>

Indeed, no driver should clear statistics over open/close.

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next] selftests/bpf: get rid of -D__x86_64__
From: David Miller @ 2017-05-03 13:52 UTC (permalink / raw)
  To: ast; +Cc: daniel, netdev
In-Reply-To: <20170503041443.3125777-1-ast@fb.com>

From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 2 May 2017 21:14:43 -0700

> -D__x86_64__ workaround was used to make /usr/include/features.h
> to follow expected path through the system include headers.
> This is not portable.
> Instead define dummy stubs.h which is used by 'clang -target bpf'
> 
> Fixes: 6882804c916b ("selftests/bpf: add a test for overlapping packet range checks")
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>

Applied, thanks for getting rid of this wart.

^ permalink raw reply

* Re: [PATCH net-next] selftests/bpf: add a test case to check verifier pointer arithmetic
From: David Miller @ 2017-05-03 13:52 UTC (permalink / raw)
  To: ast; +Cc: daniel, netdev, kernel-team
In-Reply-To: <20170503025814.2787337-1-ast@fb.com>

From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 2 May 2017 19:58:14 -0700

> From: Yonghong Song <yhs@fb.com>
> 
> With clang/llvm 4.0+, the test case is able to generate
> the following pattern:
> ....
> 440: (b7) r1 = 15
> 441: (05) goto pc+73
> 515: (79) r6 = *(u64 *)(r10 -152)
> 516: (bf) r7 = r10
> 517: (07) r7 += -112
> 518: (bf) r2 = r7
> 519: (0f) r2 += r1
> 520: (71) r1 = *(u8 *)(r8 +0)
> 521: (73) *(u8 *)(r2 +45) = r1
> ....
> 
> commit 332270fdc8b6 ("bpf: enhance verifier to understand stack
> pointer arithmetic") improved verifier to handle such a pattern.
> This patch adds a C test case to actually generate such a pattern.
> A dummy tracepoint interface is used to load the program
> into the kernel.
> 
> Signed-off-by: Yonghong Song <yhs@fb.com>
> Acked-by: Martin KaFai Lau <kafai@fb.com>
> Acked-by: Daniel Borkmann <daniel@iogearbox.net>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>

Applied.

^ permalink raw reply

* Re: [PATCH] xdp: use common helper for netlink extended ack reporting
From: David Miller @ 2017-05-03 13:51 UTC (permalink / raw)
  To: daniel; +Cc: jakub.kicinski, alexei.starovoitov, netdev
In-Reply-To: <9dc20d2c3cca095b42e730655c8fd9f5d59a4568.1493763990.git.daniel@iogearbox.net>

From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed,  3 May 2017 00:39:17 +0200

> Small follow-up to d74a32acd59a ("xdp: use netlink extended ACK reporting")
> in order to let drivers all use the same NL_SET_ERR_MSG_MOD() helper macro
> for reporting. This also ensures that we consistently add the driver's
> prefix for dumping the report in user space to indicate that the error
> message is driver specific and not coming from core code. Furthermore,
> NL_SET_ERR_MSG_MOD() now reuses NL_SET_ERR_MSG() and thus makes all macros
> check the pointer as suggested.
> 
> References: https://www.spinics.net/lists/netdev/msg433267.html
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

Applied.

^ permalink raw reply

* [PATCH] net/sched: remove redundant null check on head
From: Colin King @ 2017-05-03 13:50 UTC (permalink / raw)
  To: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev
  Cc: kernel-janitors, David S . Miller, linux-kernel

From: Colin Ian King <colin.king@canonical.com>

head is previously null checked and so the 2nd null check on head
is redundant and therefore can be removed.

Detected by CoverityScan, CID#1399505 ("Logically dead code")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 net/sched/cls_matchall.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 2efb36c08f2a..dee469fed967 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -203,8 +203,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,

 	*arg = (unsigned long) head;
 	rcu_assign_pointer(tp->root, new);
-	if (head)
-		call_rcu(&head->rcu, mall_destroy_rcu);
+	call_rcu(&head->rcu, mall_destroy_rcu);
 	return 0;

 err_replace_hw_filter:
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH net] net: ipv6: Do not duplicate DAD on link up
From: David Miller @ 2017-05-03 13:46 UTC (permalink / raw)
  To: dsahern; +Cc: netdev, dvyukov, andreyknvl
In-Reply-To: <1493761424-17407-1-git-send-email-dsahern@gmail.com>

From: David Ahern <dsahern@gmail.com>
Date: Tue,  2 May 2017 14:43:44 -0700

> Andrey reported a warning triggered by the rcu code:
 ...
> Andrey's reproducer program runs in a very tight loop, calling
> 'unshare -n' and then spawning 2 sets of 14 threads running random ioctl
> calls. The relevant networking sequence:
> 
> 1. New network namespace created via unshare -n
> - ip6tnl0 device is created in down state
> 
> 2. address added to ip6tnl0
> - equivalent to ip -6 addr add dev ip6tnl0 fd00::bb/1
> - DAD is started on the address and when it completes the host
>   route is inserted into the FIB
> 
> 3. ip6tnl0 is brought up
> - the new fixup_permanent_addr function restarts DAD on the address
> 
> 4. exit namespace
> - teardown / cleanup sequence starts
> - once in a blue moon, lo teardown appears to happen BEFORE teardown
>   of ip6tunl0
>   + down on 'lo' removes the host route from the FIB since the dst->dev
>     for the route is loobback
>   + host route added to rcu callback list
>     * rcu callback has not run yet, so rt is NOT on the gc list so it has
>       NOT been marked obsolete
> 
> 5. in parallel to 4. worker_thread runs addrconf_dad_completed
> - DAD on the address on ip6tnl0 completes
> - calls ipv6_ifa_notify which inserts the host route
> 
> All of that happens very quickly. The result is that a host route that
> has been deleted from the IPv6 FIB and added to the RCU list is re-inserted
> into the FIB.
> 
> The exit namespace eventually gets to cleaning up ip6tnl0 which removes the
> host route from the FIB again, calls the rcu function for cleanup -- and
> triggers the double rcu trace.
> 
> The root cause is duplicate DAD on the address -- steps 2 and 3. Arguably,
> DAD should not be started in step 2. The interface is in the down state,
> so it can not really send out requests for the address which makes starting
> DAD pointless.
> 
> Since the second DAD was introduced by a recent change, seems appropriate
> to use it for the Fixes tag and have the fixup function only start DAD for
> addresses in the PREDAD state which occurs in addrconf_ifdown if the
> address is retained.
> 
> Big thanks to Andrey for isolating a reliable reproducer for this problem.
> Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
> Reported-by: Andrey Konovalov <andreyknvl@google.com>
> Signed-off-by: David Ahern <dsahern@gmail.com>

Applied and queued up for -stable, thanks!

^ permalink raw reply

* Re: [PATCH v6 net-next]smsc911x: Adding support for Micochip LAN9250 Ethernet controller
From: David Miller @ 2017-05-03 13:41 UTC (permalink / raw)
  To: David.Cai; +Cc: netdev, UNGLinuxDriver, steve.glendinning
In-Reply-To: <C3C28FB10418274EB7FD7C2B85C796A44124871C@CHN-SV-EXMX02.mchp-main.com>

From: <David.Cai@microchip.com>
Date: Tue, 2 May 2017 20:59:14 +0000

> From: David Cai <david.cai@microchip.com>
> 
> Adding support for Microchip LAN9250 Ethernet controller.
> 
> Signed-off-by: David Cai <david.cai@microchip.com>

Applied.

Please fix the "From: " field of your outgoing patch emails in the future,
and please do not top-post as you did in your reply to Andrew.

Thanks.

^ permalink raw reply

* [PATCH v2 net] tcp: do not inherit fastopen_req from parent
From: Eric Dumazet @ 2017-05-03 13:39 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Wei Wang, Andrey Konovalov
In-Reply-To: <1493818595.7796.11.camel@edumazet-glaptop3.roam.corp.google.com>

From: Eric Dumazet <edumazet@google.com>

Under fuzzer stress, it is possible that a child gets a non NULL
fastopen_req pointer from its parent at accept() time, when/if parent
morphs from listener to active session.

We need to make sure this can not happen, by clearing the field after
socket cloning.

BUG: Double free or freeing an invalid pointer
Unexpected shadow byte: 0xFB
CPU: 3 PID: 20933 Comm: syz-executor3 Not tainted 4.11.0+ #306
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x292/0x395 lib/dump_stack.c:52
 kasan_object_err+0x1c/0x70 mm/kasan/report.c:164
 kasan_report_double_free+0x5c/0x70 mm/kasan/report.c:185
 kasan_slab_free+0x9d/0xc0 mm/kasan/kasan.c:580
 slab_free_hook mm/slub.c:1357 [inline]
 slab_free_freelist_hook mm/slub.c:1379 [inline]
 slab_free mm/slub.c:2961 [inline]
 kfree+0xe8/0x2b0 mm/slub.c:3882
 tcp_free_fastopen_req net/ipv4/tcp.c:1077 [inline]
 tcp_disconnect+0xc15/0x13e0 net/ipv4/tcp.c:2328
 inet_child_forget+0xb8/0x600 net/ipv4/inet_connection_sock.c:898
 inet_csk_reqsk_queue_add+0x1e7/0x250
net/ipv4/inet_connection_sock.c:928
 tcp_get_cookie_sock+0x21a/0x510 net/ipv4/syncookies.c:217
 cookie_v4_check+0x1a19/0x28b0 net/ipv4/syncookies.c:384
 tcp_v4_cookie_check net/ipv4/tcp_ipv4.c:1384 [inline]
 tcp_v4_do_rcv+0x731/0x940 net/ipv4/tcp_ipv4.c:1421
 tcp_v4_rcv+0x2dc0/0x31c0 net/ipv4/tcp_ipv4.c:1715
 ip_local_deliver_finish+0x4cc/0xc20 net/ipv4/ip_input.c:216
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ip_local_deliver+0x1ce/0x700 net/ipv4/ip_input.c:257
 dst_input include/net/dst.h:492 [inline]
 ip_rcv_finish+0xb1d/0x20b0 net/ipv4/ip_input.c:396
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ip_rcv+0xd8c/0x19c0 net/ipv4/ip_input.c:487
 __netif_receive_skb_core+0x1ad1/0x3400 net/core/dev.c:4210
 __netif_receive_skb+0x2a/0x1a0 net/core/dev.c:4248
 process_backlog+0xe5/0x6c0 net/core/dev.c:4868
 napi_poll net/core/dev.c:5270 [inline]
 net_rx_action+0xe70/0x18e0 net/core/dev.c:5335
 __do_softirq+0x2fb/0xb99 kernel/softirq.c:284
 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:899
 </IRQ>
 do_softirq.part.17+0x1e8/0x230 kernel/softirq.c:328
 do_softirq kernel/softirq.c:176 [inline]
 __local_bh_enable_ip+0x1cf/0x1e0 kernel/softirq.c:181
 local_bh_enable include/linux/bottom_half.h:31 [inline]
 rcu_read_unlock_bh include/linux/rcupdate.h:931 [inline]
 ip_finish_output2+0x9ab/0x15e0 net/ipv4/ip_output.c:230
 ip_finish_output+0xa35/0xdf0 net/ipv4/ip_output.c:316
 NF_HOOK_COND include/linux/netfilter.h:246 [inline]
 ip_output+0x1f6/0x7b0 net/ipv4/ip_output.c:404
 dst_output include/net/dst.h:486 [inline]
 ip_local_out+0x95/0x160 net/ipv4/ip_output.c:124
 ip_queue_xmit+0x9a8/0x1a10 net/ipv4/ip_output.c:503
 tcp_transmit_skb+0x1ade/0x3470 net/ipv4/tcp_output.c:1057
 tcp_write_xmit+0x79e/0x55b0 net/ipv4/tcp_output.c:2265
 __tcp_push_pending_frames+0xfa/0x3a0 net/ipv4/tcp_output.c:2450
 tcp_push+0x4ee/0x780 net/ipv4/tcp.c:683
 tcp_sendmsg+0x128d/0x39b0 net/ipv4/tcp.c:1342
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x660/0x810 net/socket.c:1696
 SyS_sendto+0x40/0x50 net/socket.c:1664
 entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x446059
RSP: 002b:00007faa6761fb58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 0000000000000017 RCX: 0000000000446059
RDX: 0000000000000001 RSI: 0000000020ba3fcd RDI: 0000000000000017
RBP: 00000000006e40a0 R08: 0000000020ba4ff0 R09: 0000000000000010
R10: 0000000020000000 R11: 0000000000000282 R12: 0000000000708150
R13: 0000000000000000 R14: 00007faa676209c0 R15: 00007faa67620700
Object at ffff88003b5bbcb8, in cache kmalloc-64 size: 64
Allocated:
PID = 20909
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:513
 set_track mm/kasan/kasan.c:525 [inline]
 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:616
 kmem_cache_alloc_trace+0x82/0x270 mm/slub.c:2745
 kmalloc include/linux/slab.h:490 [inline]
 kzalloc include/linux/slab.h:663 [inline]
 tcp_sendmsg_fastopen net/ipv4/tcp.c:1094 [inline]
 tcp_sendmsg+0x221a/0x39b0 net/ipv4/tcp.c:1139
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x660/0x810 net/socket.c:1696
 SyS_sendto+0x40/0x50 net/socket.c:1664
 entry_SYSCALL_64_fastpath+0x1f/0xbe
Freed:
PID = 20909
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:513
 set_track mm/kasan/kasan.c:525 [inline]
 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:589
 slab_free_hook mm/slub.c:1357 [inline]
 slab_free_freelist_hook mm/slub.c:1379 [inline]
 slab_free mm/slub.c:2961 [inline]
 kfree+0xe8/0x2b0 mm/slub.c:3882
 tcp_free_fastopen_req net/ipv4/tcp.c:1077 [inline]
 tcp_disconnect+0xc15/0x13e0 net/ipv4/tcp.c:2328
 __inet_stream_connect+0x20c/0xf90 net/ipv4/af_inet.c:593
 tcp_sendmsg_fastopen net/ipv4/tcp.c:1111 [inline]
 tcp_sendmsg+0x23a8/0x39b0 net/ipv4/tcp.c:1139
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x660/0x810 net/socket.c:1696
 SyS_sendto+0x40/0x50 net/socket.c:1664
 entry_SYSCALL_64_fastpath+0x1f/0xbe


Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
Fixes: 7db92362d2fe ("tcp: fix potential double free issue for fastopen_req")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
---
Note : Probably better to backport this all the way back to 3.17 when
Jerry added FastOpen server code. I am not sure when the bug was really
added.

 net/ipv4/tcp_minisocks.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 8f6373b0cd7729e7afde1b733879058197e9c5ca..717be4de53248352c758b50557987d898340dd4f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -523,6 +523,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 			newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
 		newtp->rx_opt.mss_clamp = req->mss;
 		tcp_ecn_openreq_child(newtp, req);
+		newtp->fastopen_req = NULL;
 		newtp->fastopen_rsk = NULL;
 		newtp->syn_data_acked = 0;
 		newtp->rack.mstamp.v64 = 0;

^ permalink raw reply related

* [PATCH net] tcp: do not inherit fastopen_req from parent
From: Eric Dumazet @ 2017-05-03 13:36 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Wei Wang, Andrey Konovalov

From: Eric Dumazet <edumazet@google.com>

Under fuzzer stress, it is possible that a child gets a non NULL
fastopen_req pointer from its parent at accept() time, when/if parent
morphs from listener to active session.

We need to make sure this can not happen, by clearing the field after
socket cloning.

==================================================================
BUG: Double free or freeing an invalid pointer
Unexpected shadow byte: 0xFB
CPU: 3 PID: 20933 Comm: syz-executor3 Not tainted 4.11.0+ #306
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x292/0x395 lib/dump_stack.c:52
 kasan_object_err+0x1c/0x70 mm/kasan/report.c:164
 kasan_report_double_free+0x5c/0x70 mm/kasan/report.c:185
 kasan_slab_free+0x9d/0xc0 mm/kasan/kasan.c:580
 slab_free_hook mm/slub.c:1357 [inline]
 slab_free_freelist_hook mm/slub.c:1379 [inline]
 slab_free mm/slub.c:2961 [inline]
 kfree+0xe8/0x2b0 mm/slub.c:3882
 tcp_free_fastopen_req net/ipv4/tcp.c:1077 [inline]
 tcp_disconnect+0xc15/0x13e0 net/ipv4/tcp.c:2328
 inet_child_forget+0xb8/0x600 net/ipv4/inet_connection_sock.c:898
 inet_csk_reqsk_queue_add+0x1e7/0x250
net/ipv4/inet_connection_sock.c:928
 tcp_get_cookie_sock+0x21a/0x510 net/ipv4/syncookies.c:217
 cookie_v4_check+0x1a19/0x28b0 net/ipv4/syncookies.c:384
 tcp_v4_cookie_check net/ipv4/tcp_ipv4.c:1384 [inline]
 tcp_v4_do_rcv+0x731/0x940 net/ipv4/tcp_ipv4.c:1421
 tcp_v4_rcv+0x2dc0/0x31c0 net/ipv4/tcp_ipv4.c:1715
 ip_local_deliver_finish+0x4cc/0xc20 net/ipv4/ip_input.c:216
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ip_local_deliver+0x1ce/0x700 net/ipv4/ip_input.c:257
 dst_input include/net/dst.h:492 [inline]
 ip_rcv_finish+0xb1d/0x20b0 net/ipv4/ip_input.c:396
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ip_rcv+0xd8c/0x19c0 net/ipv4/ip_input.c:487
 __netif_receive_skb_core+0x1ad1/0x3400 net/core/dev.c:4210
 __netif_receive_skb+0x2a/0x1a0 net/core/dev.c:4248
 process_backlog+0xe5/0x6c0 net/core/dev.c:4868
 napi_poll net/core/dev.c:5270 [inline]
 net_rx_action+0xe70/0x18e0 net/core/dev.c:5335
 __do_softirq+0x2fb/0xb99 kernel/softirq.c:284
 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:899
 </IRQ>
 do_softirq.part.17+0x1e8/0x230 kernel/softirq.c:328
 do_softirq kernel/softirq.c:176 [inline]
 __local_bh_enable_ip+0x1cf/0x1e0 kernel/softirq.c:181
 local_bh_enable include/linux/bottom_half.h:31 [inline]
 rcu_read_unlock_bh include/linux/rcupdate.h:931 [inline]
 ip_finish_output2+0x9ab/0x15e0 net/ipv4/ip_output.c:230
 ip_finish_output+0xa35/0xdf0 net/ipv4/ip_output.c:316
 NF_HOOK_COND include/linux/netfilter.h:246 [inline]
 ip_output+0x1f6/0x7b0 net/ipv4/ip_output.c:404
 dst_output include/net/dst.h:486 [inline]
 ip_local_out+0x95/0x160 net/ipv4/ip_output.c:124
 ip_queue_xmit+0x9a8/0x1a10 net/ipv4/ip_output.c:503
 tcp_transmit_skb+0x1ade/0x3470 net/ipv4/tcp_output.c:1057
 tcp_write_xmit+0x79e/0x55b0 net/ipv4/tcp_output.c:2265
 __tcp_push_pending_frames+0xfa/0x3a0 net/ipv4/tcp_output.c:2450
 tcp_push+0x4ee/0x780 net/ipv4/tcp.c:683
 tcp_sendmsg+0x128d/0x39b0 net/ipv4/tcp.c:1342
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x660/0x810 net/socket.c:1696
 SyS_sendto+0x40/0x50 net/socket.c:1664
 entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x446059
RSP: 002b:00007faa6761fb58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 0000000000000017 RCX: 0000000000446059
RDX: 0000000000000001 RSI: 0000000020ba3fcd RDI: 0000000000000017
RBP: 00000000006e40a0 R08: 0000000020ba4ff0 R09: 0000000000000010
R10: 0000000020000000 R11: 0000000000000282 R12: 0000000000708150
R13: 0000000000000000 R14: 00007faa676209c0 R15: 00007faa67620700
Object at ffff88003b5bbcb8, in cache kmalloc-64 size: 64
Allocated:
PID = 20909
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:513
 set_track mm/kasan/kasan.c:525 [inline]
 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:616
 kmem_cache_alloc_trace+0x82/0x270 mm/slub.c:2745
 kmalloc include/linux/slab.h:490 [inline]
 kzalloc include/linux/slab.h:663 [inline]
 tcp_sendmsg_fastopen net/ipv4/tcp.c:1094 [inline]
 tcp_sendmsg+0x221a/0x39b0 net/ipv4/tcp.c:1139
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x660/0x810 net/socket.c:1696
 SyS_sendto+0x40/0x50 net/socket.c:1664
 entry_SYSCALL_64_fastpath+0x1f/0xbe
Freed:
PID = 20909
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:513
 set_track mm/kasan/kasan.c:525 [inline]
 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:589
 slab_free_hook mm/slub.c:1357 [inline]
 slab_free_freelist_hook mm/slub.c:1379 [inline]
 slab_free mm/slub.c:2961 [inline]
 kfree+0xe8/0x2b0 mm/slub.c:3882
 tcp_free_fastopen_req net/ipv4/tcp.c:1077 [inline]
 tcp_disconnect+0xc15/0x13e0 net/ipv4/tcp.c:2328
 __inet_stream_connect+0x20c/0xf90 net/ipv4/af_inet.c:593
 tcp_sendmsg_fastopen net/ipv4/tcp.c:1111 [inline]
 tcp_sendmsg+0x23a8/0x39b0 net/ipv4/tcp.c:1139
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x660/0x810 net/socket.c:1696
 SyS_sendto+0x40/0x50 net/socket.c:1664
 entry_SYSCALL_64_fastpath+0x1f/0xbe
==================================================================


Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
Fixes: 7db92362d2fe ("tcp: fix potential double free issue for fastopen_req")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
---
Note : Probably better to backport this all the way back to 3.17 when
Jerry added FastOpen server code. I am not sure when the bug was really
added.

 net/ipv4/tcp_minisocks.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 8f6373b0cd7729e7afde1b733879058197e9c5ca..717be4de53248352c758b50557987d898340dd4f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -523,6 +523,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 			newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
 		newtp->rx_opt.mss_clamp = req->mss;
 		tcp_ecn_openreq_child(newtp, req);
+		newtp->fastopen_req = NULL;
 		newtp->fastopen_rsk = NULL;
 		newtp->syn_data_acked = 0;
 		newtp->rack.mstamp.v64 = 0;

^ permalink raw reply related

* Re: [net-next PATCH 1/4] samples/bpf: adjust rlimit RLIMIT_MEMLOCK for traceex2, tracex3 and tracex4
From: David Miller @ 2017-05-03 13:31 UTC (permalink / raw)
  To: alexei.starovoitov; +Cc: brouer, kafai, netdev, eric, borkmann
In-Reply-To: <20170503005314.7oovr764r3e4elzd@ast-mbp>

From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Date: Tue, 2 May 2017 17:53:16 -0700

> On Tue, May 02, 2017 at 02:31:50PM +0200, Jesper Dangaard Brouer wrote:
>> Needed to adjust max locked memory RLIMIT_MEMLOCK for testing these bpf samples
>> as these are using more and larger maps than can fit in distro default 64Kbytes limit.
>> 
>> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> ...
>> +	struct rlimit r = {1024*1024, RLIM_INFINITY};
> ...
>> +	struct rlimit r = {1024*1024, RLIM_INFINITY};
> 
> why magic numbers?
> All other samples do
> struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};

Let's not do that.

People run these tests often as root, so the safer we make running
these test the better.

A weird magic limit is better than none at all.

^ permalink raw reply

* Re: [net-next PATCH 0/4] Improve bpf ELF-loader under samples/bpf
From: David Miller @ 2017-05-03 13:30 UTC (permalink / raw)
  To: brouer; +Cc: kafai, netdev, eric, borkmann, alexei.starovoitov
In-Reply-To: <149372826543.22268.3617359219409721129.stgit@firesoul>

From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 02 May 2017 14:31:45 +0200

> This series improves and fixes bpf ELF loader and programs under
> samples/bpf.  The bpf_load.c created some hard to debug issues when
> the struct (bpf_map_def) used in the ELF maps section format changed
> in commit fb30d4b71214 ("bpf: Add tests for map-in-map").
> 
> This was hotfixed in commit 409526bea3c3 ("samples/bpf: bpf_load.c
> detect and abort if ELF maps section size is wrong") by detecting the
> issue and aborting the program.
> 
> In most situations the bpf-loader should be able to handle these kind
> of changes to the struct size.  This patch series aim to do proper
> backward and forward compabilility handling when loading ELF files.
> 
> This series also adjust the callback that was introduced in commit
> 9fd63d05f3e8 ("bpf: Allow bpf sample programs (*_user.c) to change
> bpf_map_def") to use the new bpf_map_data structure, before more users
> start to use this callback.
> 
> Hoping these changes can make the merge window, as above mentioned
> commits have not been merged yet, and it would be good to avoid users
> hitting these issues.

I've applied this series, thanks!

^ permalink raw reply

* Re: x86: warning: kernel stack regs has bad 'bp' value
From: Josh Poimboeuf @ 2017-05-03 13:30 UTC (permalink / raw)
  To: Andrey Konovalov
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86, LKML,
	Vlad Yasevich, Neil Horman, David S. Miller, linux-sctp, netdev,
	Marcelo Ricardo Leitner, Dmitry Vyukov, Kostya Serebryany,
	syzkaller, Eric Dumazet, Cong Wang
In-Reply-To: <CAAeHK+zapAtjKaTMG7RSxn2P1K=+BK32sfroPToppXHMLD-wpA@mail.gmail.com>

On Wed, May 03, 2017 at 02:48:28PM +0200, Andrey Konovalov wrote:
> Hi,
> 
> I've got the following error report while fuzzing the kernel with syzkaller.
> 
> On commit 89c9fea3c8034cdb2fd745f551cde0b507fd6893 (4.11.0+).
> 
> A reproducer and .config are attached.
> 
> The reproducer open SCTP sockets and sends data to it in a loop.
> I'm not sure whether this is an issue with SCTP or with something else.
> 
> WARNING: kernel stack regs at ffff8800686869f8 in a.out:4933 has bad
> 'bp' value c3fc855a10167ec0

Hi Andrey,

Can you test this patch?


diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 7e48807..45a53df 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -55,7 +55,7 @@ ENTRY(csum_partial_copy_generic)
 	movq  %r12, 3*8(%rsp)
 	movq  %r14, 4*8(%rsp)
 	movq  %r13, 5*8(%rsp)
-	movq  %rbp, 6*8(%rsp)
+	movq  %r15, 6*8(%rsp)
 
 	movq  %r8, (%rsp)
 	movq  %r9, 1*8(%rsp)
@@ -74,7 +74,7 @@ ENTRY(csum_partial_copy_generic)
 	/* main loop. clear in 64 byte blocks */
 	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
 	/* r11:	temp3, rdx: temp4, r12 loopcnt */
-	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
+	/* r10:	temp5, r15: temp6, r14 temp7, r13 temp8 */
 	.p2align 4
 .Lloop:
 	source
@@ -89,7 +89,7 @@ ENTRY(csum_partial_copy_generic)
 	source
 	movq  32(%rdi), %r10
 	source
-	movq  40(%rdi), %rbp
+	movq  40(%rdi), %r15
 	source
 	movq  48(%rdi), %r14
 	source
@@ -103,7 +103,7 @@ ENTRY(csum_partial_copy_generic)
 	adcq  %r11, %rax
 	adcq  %rdx, %rax
 	adcq  %r10, %rax
-	adcq  %rbp, %rax
+	adcq  %r15, %rax
 	adcq  %r14, %rax
 	adcq  %r13, %rax
 
@@ -121,7 +121,7 @@ ENTRY(csum_partial_copy_generic)
 	dest
 	movq %r10, 32(%rsi)
 	dest
-	movq %rbp, 40(%rsi)
+	movq %r15, 40(%rsi)
 	dest
 	movq %r14, 48(%rsi)
 	dest
@@ -203,7 +203,7 @@ ENTRY(csum_partial_copy_generic)
 	movq 3*8(%rsp), %r12
 	movq 4*8(%rsp), %r14
 	movq 5*8(%rsp), %r13
-	movq 6*8(%rsp), %rbp
+	movq 6*8(%rsp), %r15
 	addq $7*8, %rsp
 	ret
 

^ permalink raw reply related

* Re: [PATCH v4 net-next 06/10] net/ncsi: Ethtool operation to get NCSI hw statistics
From: David Miller @ 2017-05-03 13:18 UTC (permalink / raw)
  To: andrew; +Cc: gwshan, netdev, joe, kubakici, f.fainelli
In-Reply-To: <20170503124722.GE8029@lunn.ch>

From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 3 May 2017 14:47:22 +0200

> On Wed, May 03, 2017 at 02:44:37PM +1000, Gavin Shan wrote:
>> This adds ethtool command (ETHTOOL_GNCSISTATS) to retrieve the
>> NCSI hardware statistics.
> 
> Hi Gavin
> 
> I've not been following this patchset, so maybe i'm about to ask a
> question which has already been asked and answered.
> 
> Why cannot use just use ethtool -S for this?

Indeed, when I said to use ethtool for these NCSI hw stats I meant
that the "ethtool -S" be used, not some new ethtool command.

^ permalink raw reply

* RE: [PATCH net v3] driver: veth: Fix one possbile memleak when fail to register_netdevice
From: Gao Feng @ 2017-05-03 13:17 UTC (permalink / raw)
  To: 'Xin Long'
  Cc: 'Gao Feng', 'davem', jarod,
	'Stephen Hemminger', dsa, 'network dev'
In-Reply-To: <CADvbK_dqU7wDQoU2nHBwPkQjzSuiVGXStYBD9iZSvz=j8SGt-w@mail.gmail.com>

> From: Xin Long [mailto:lucien.xin@gmail.com]
> Sent: Wednesday, May 3, 2017 7:26 PM
> On Wed, May 3, 2017 at 2:37 PM, Gao Feng <gfree.wind@foxmail.com> wrote:
> >> From: Xin Long [mailto:lucien.xin@gmail.com]
> >> Sent: Wednesday, May 3, 2017 1:38 PM
> >> On Wed, May 3, 2017 at 10:07 AM, Gao Feng <gfree.wind@foxmail.com>
> >> wrote:
> >> >> From: netdev-owner@vger.kernel.org
> >> >> [mailto:netdev-owner@vger.kernel.org]
> >> >> On Behalf Of Xin Long
> >> >> Sent: Wednesday, May 3, 2017 12:59 AM On Tue, May 2, 2017 at 7:03
> >> >> PM, Gao Feng <gfree.wind@vip.163.com> wrote:
> >> >> >> From: Xin Long [mailto:lucien.xin@gmail.com]
> >> >> >> Sent: Tuesday, May 2, 2017 3:56 PM On Sat, Apr 29, 2017 at
> >> >> >> 11:51 AM,  <gfree.wind@foxmail.com> wrote:
> >> >> >> > From: Gao Feng <gfree.wind@foxmail.com>
> > [...]
> >> > The fix you mentioned change the original logic.
> >> > The dev->vstats is freed in advance in the ndo_uninit, not destructor.
> >> > It may break the backward.
> >> Sorry, I didn't get your "backward"
> >> I can't see there will be any problem caused by it.
> >> can you say this patch also break the 'backward' ?
> >> https://patchwork.ozlabs.org/patch/748964/
> >>
> >> It's really weird to do dev->reg_state check in ndo_unint ndo_unint
> >> is supposed to free the memory alloced in ndo_init.
> >>
> >
> > I am not sure if it would break the backward, so I said it MAY break.
> > I assumed there may be someone would access the dev->vstats after
> > ndo_uninit, because current veth driver free the mem in the destructor.
> > I selected this approach because I don't want to bring new bugs during fix
> bug.
> >
> > If you're sure it is safe to free dev->vstats in ndo_uninit, I would like to
> update it.
> yes, stats are accessed in .ndo_start_xmit waited by synchronize_net() and
> .ndo_get_stats64 protected by rtnl_lock().

Thanks, I would update the series later with your advice.
I need to wait for a while to collect more comments.
 
> >
> > BTW there are too many drivers which have possible memleak.
> > You could find the list by
> https://www.mail-archive.com/netdev@vger.kernel.org/msg166629.html.
> ah, cool.
> I'm not sure about other dev's stuff, have to check them for sure later.

Expect and thanks your reviews:)

Best Regards
Feng

> 
> >
> > Some drivers allocate the resources in ndo_init, free some in ndo_uninit and
> free left in destructor.
> > I think there are some reasons.
> > We could not move all free in the ndo_uninit from destructor. What's your
> opinion?
> >
> > Best Regards
> > Feng
> >
> >
> >

^ permalink raw reply

* Re: [PATCH net] net: ipv6: Do not duplicate DAD on link up
From: Andrey Konovalov @ 2017-05-03 13:09 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, Dmitry Vyukov
In-Reply-To: <1493761424-17407-1-git-send-email-dsahern@gmail.com>

On Tue, May 2, 2017 at 11:43 PM, David Ahern <dsahern@gmail.com> wrote:
> Andrey reported a warning triggered by the rcu code:
>
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 5911 at lib/debugobjects.c:289
> debug_print_object+0x175/0x210
> ODEBUG: activate active (active state 1) object type: rcu_head hint:
>         (null)
> Modules linked in:
> CPU: 1 PID: 5911 Comm: a.out Not tainted 4.11.0-rc8+ #271
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:16
>  dump_stack+0x192/0x22d lib/dump_stack.c:52
>  __warn+0x19f/0x1e0 kernel/panic.c:549
>  warn_slowpath_fmt+0xe0/0x120 kernel/panic.c:564
>  debug_print_object+0x175/0x210 lib/debugobjects.c:286
>  debug_object_activate+0x574/0x7e0 lib/debugobjects.c:442
>  debug_rcu_head_queue kernel/rcu/rcu.h:75
>  __call_rcu.constprop.76+0xff/0x9c0 kernel/rcu/tree.c:3229
>  call_rcu_sched+0x12/0x20 kernel/rcu/tree.c:3288
>  rt6_rcu_free net/ipv6/ip6_fib.c:158
>  rt6_release+0x1ea/0x290 net/ipv6/ip6_fib.c:188
>  fib6_del_route net/ipv6/ip6_fib.c:1461
>  fib6_del+0xa42/0xdc0 net/ipv6/ip6_fib.c:1500
>  __ip6_del_rt+0x100/0x160 net/ipv6/route.c:2174
>  ip6_del_rt+0x140/0x1b0 net/ipv6/route.c:2187
>  __ipv6_ifa_notify+0x269/0x780 net/ipv6/addrconf.c:5520
>  addrconf_ifdown+0xe60/0x1a20 net/ipv6/addrconf.c:3672
> ...
>
> Andrey's reproducer program runs in a very tight loop, calling
> 'unshare -n' and then spawning 2 sets of 14 threads running random ioctl
> calls. The relevant networking sequence:
>
> 1. New network namespace created via unshare -n
> - ip6tnl0 device is created in down state
>
> 2. address added to ip6tnl0
> - equivalent to ip -6 addr add dev ip6tnl0 fd00::bb/1
> - DAD is started on the address and when it completes the host
>   route is inserted into the FIB
>
> 3. ip6tnl0 is brought up
> - the new fixup_permanent_addr function restarts DAD on the address
>
> 4. exit namespace
> - teardown / cleanup sequence starts
> - once in a blue moon, lo teardown appears to happen BEFORE teardown
>   of ip6tunl0
>   + down on 'lo' removes the host route from the FIB since the dst->dev
>     for the route is loobback
>   + host route added to rcu callback list
>     * rcu callback has not run yet, so rt is NOT on the gc list so it has
>       NOT been marked obsolete
>
> 5. in parallel to 4. worker_thread runs addrconf_dad_completed
> - DAD on the address on ip6tnl0 completes
> - calls ipv6_ifa_notify which inserts the host route
>
> All of that happens very quickly. The result is that a host route that
> has been deleted from the IPv6 FIB and added to the RCU list is re-inserted
> into the FIB.
>
> The exit namespace eventually gets to cleaning up ip6tnl0 which removes the
> host route from the FIB again, calls the rcu function for cleanup -- and
> triggers the double rcu trace.
>
> The root cause is duplicate DAD on the address -- steps 2 and 3. Arguably,
> DAD should not be started in step 2. The interface is in the down state,
> so it can not really send out requests for the address which makes starting
> DAD pointless.
>
> Since the second DAD was introduced by a recent change, seems appropriate
> to use it for the Fixes tag and have the fixup function only start DAD for
> addresses in the PREDAD state which occurs in addrconf_ifdown if the
> address is retained.

Hi David,

Your patch fixes the issue for me.

I'll keep testing to see if all the reports related to fib6 are gone.

Tested-by: Andrey Konovalov <andreyknvl@google.com>

Thanks!

>
> Big thanks to Andrey for isolating a reliable reproducer for this problem.
> Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
> Reported-by: Andrey Konovalov <andreyknvl@google.com>
> Signed-off-by: David Ahern <dsahern@gmail.com>
> ---
>  net/ipv6/addrconf.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index 67ec87ea5fb6..4fc4e8634e65 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -3306,7 +3306,8 @@ static int fixup_permanent_addr(struct inet6_dev *idev,
>                                       idev->dev, 0, 0);
>         }
>
> -       addrconf_dad_start(ifp);
> +       if (ifp->state == INET6_IFADDR_STATE_PREDAD)
> +               addrconf_dad_start(ifp);
>
>         return 0;
>  }
> @@ -3656,7 +3657,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
>                     !addr_is_local(&ifa->addr)) {
>                         /* set state to skip the notifier below */
>                         state = INET6_IFADDR_STATE_DEAD;
> -                       ifa->state = 0;
> +                       ifa->state = INET6_IFADDR_STATE_PREDAD;
>                         if (!(ifa->flags & IFA_F_NODAD))
>                                 ifa->flags |= IFA_F_TENTATIVE;
>
> --
> 2.1.4
>

^ permalink raw reply

* Re: [PATCH v4 net-next 00/10] net/ncsi: Add debugging functionality
From: Andrew Lunn @ 2017-05-03 12:58 UTC (permalink / raw)
  To: Gavin Shan; +Cc: netdev, joe, kubakici, f.fainelli, davem
In-Reply-To: <1493786681-27468-1-git-send-email-gwshan@linux.vnet.ibm.com>

On Wed, May 03, 2017 at 02:44:31PM +1000, Gavin Shan wrote:
> This series supports NCSI debugging infrastructure by adding several
> ethtool commands and one debugfs file. It was inspired by the reported
> issues: No available package and channel are probed successfully.
> Obviously, we don't have a debugging infrastructure for NCSI stack yet.
> 
> The first 3 patches, fixing some issues, aren't relevant to the
> subject. 

...

> PATCH[9,10] fix two issues found by the debugging functionality.

Hi Gavin

If they are real fixes, you should submit them separately. Please
include Fixes: tags, and indicate if they need to be included in
-stable.

David will probably accept fixes while the merge window is closed.

	Andrew

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox