netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Netanel Belgazal <netanel@annapurnalabs.com>
To: linux-kernel@vger.kernel.org, davem@davemloft.net,
	netdev@vger.kernel.org
Cc: Netanel Belgazal <netanel@annapurnalabs.com>,
	dwmw@amazon.com, zorik@annapurnalabs.com, alex@annapurnalabs.com,
	saeed@annapurnalabs.com, msw@amazon.com, aliguori@amazon.com,
	nafea@annapurnalabs.com, eric.dumazet@gmail.com
Subject: [PATCH V3 net-next 08/14] net/ena: fix potential access to freed memory during device reset
Date: Fri, 27 Jan 2017 00:18:10 +0200	[thread overview]
Message-ID: <1485469096-5271-9-git-send-email-netanel@annapurnalabs.com> (raw)
In-Reply-To: <1485469096-5271-1-git-send-email-netanel@annapurnalabs.com>

If the ena driver detects that the device is not behave as expected,
it tries to reset the device.
The reset flow calls ena_down, which will frees all the resources
the driver allocates and then it will reset the device.

This flow can cause memory corruption if the device is still writes
to the driver's memory space.
To overcome this potential race, move the reset before the device
resources are freed.

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 56 +++++++++++++++++++++-------
 1 file changed, 43 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index ea3c801..606fb5c 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -80,14 +80,18 @@ static void ena_tx_timeout(struct net_device *dev)
 {
 	struct ena_adapter *adapter = netdev_priv(dev);
 
+	/* Change the state of the device to trigger reset
+	 * Check that we are not in the middle or a trigger already
+	 */
+
+	if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+		return;
+
 	u64_stats_update_begin(&adapter->syncp);
 	adapter->dev_stats.tx_timeout++;
 	u64_stats_update_end(&adapter->syncp);
 
 	netif_err(adapter, tx_err, dev, "Transmit time out\n");
-
-	/* Change the state of the device to trigger reset */
-	set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
 }
 
 static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
@@ -1109,7 +1113,8 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
 
 	tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
 
-	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
+	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
+	    test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
 		napi_complete_done(napi, 0);
 		return 0;
 	}
@@ -1698,12 +1703,22 @@ static void ena_down(struct ena_adapter *adapter)
 	adapter->dev_stats.interface_down++;
 	u64_stats_update_end(&adapter->syncp);
 
-	/* After this point the napi handler won't enable the tx queue */
-	ena_napi_disable_all(adapter);
 	netif_carrier_off(adapter->netdev);
 	netif_tx_disable(adapter->netdev);
 
+	/* After this point the napi handler won't enable the tx queue */
+	ena_napi_disable_all(adapter);
+
 	/* After destroy the queue there won't be any new interrupts */
+
+	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
+		int rc;
+
+		rc = ena_com_dev_reset(adapter->ena_dev);
+		if (rc)
+			dev_err(&adapter->pdev->dev, "Device reset failed\n");
+	}
+
 	ena_destroy_all_io_queues(adapter);
 
 	ena_disable_io_intr_sync(adapter);
@@ -2065,6 +2080,14 @@ static void ena_netpoll(struct net_device *netdev)
 	struct ena_adapter *adapter = netdev_priv(netdev);
 	int i;
 
+	/* Dont schedule NAPI if the driver is in the middle of reset
+	 * or netdev is down.
+	 */
+
+	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags) ||
+	    test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+		return;
+
 	for (i = 0; i < adapter->num_queues; i++)
 		napi_schedule(&adapter->ena_napi[i].napi);
 }
@@ -2451,6 +2474,14 @@ static void ena_fw_reset_device(struct work_struct *work)
 	bool dev_up, wd_state;
 	int rc;
 
+	if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+		dev_err(&pdev->dev,
+			"device reset schedule while reset bit is off\n");
+		return;
+	}
+
+	netif_carrier_off(netdev);
+
 	del_timer_sync(&adapter->timer_service);
 
 	rtnl_lock();
@@ -2464,12 +2495,6 @@ static void ena_fw_reset_device(struct work_struct *work)
 	 */
 	ena_close(netdev);
 
-	rc = ena_com_dev_reset(ena_dev);
-	if (rc) {
-		dev_err(&pdev->dev, "Device reset failed\n");
-		goto err;
-	}
-
 	ena_free_mgmnt_irq(adapter);
 
 	ena_disable_msix(adapter);
@@ -2482,6 +2507,8 @@ static void ena_fw_reset_device(struct work_struct *work)
 
 	ena_com_mmio_reg_read_request_destroy(ena_dev);
 
+	clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+
 	/* Finish with the destroy part. Start the init part */
 
 	rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
@@ -2547,6 +2574,9 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
 	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
 		return;
 
+	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+		return;
+
 	budget = ENA_MONITORED_TX_QUEUES;
 
 	for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
@@ -2646,7 +2676,7 @@ static void ena_timer_service(unsigned long data)
 	if (host_info)
 		ena_update_host_info(host_info, adapter->netdev);
 
-	if (unlikely(test_and_clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
 		netif_err(adapter, drv, adapter->netdev,
 			  "Trigger reset is on\n");
 		ena_dump_stats_to_dmesg(adapter);
-- 
2.7.4

  parent reply	other threads:[~2017-01-26 22:18 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-26 22:18 [PATCH V3 net-next 00/14] Bug Fixes in ENA driver Netanel Belgazal
2017-01-26 22:18 ` [PATCH V3 net-next 01/14] net/ena: remove ntuple filter support from device feature list Netanel Belgazal
2017-01-26 22:18 ` [PATCH V3 net-next 02/14] net/ena: fix error handling when probe fails Netanel Belgazal
2017-01-27 23:33   ` Lino Sanfilippo
2017-01-31 22:14     ` Netanel Belgazal
2017-01-26 22:18 ` [PATCH V3 net-next 03/14] net/ena: fix queues number calculation Netanel Belgazal
2017-01-26 22:18 ` [PATCH V3 net-next 04/14] net/ena: fix ethtool RSS flow configuration Netanel Belgazal
2017-01-26 22:18 ` [PATCH V3 net-next 05/14] net/ena: fix RSS default hash configuration Netanel Belgazal
2017-01-26 22:18 ` [PATCH V3 net-next 06/14] net/ena: fix NULL dereference when removing the driver after device reset failed Netanel Belgazal
2017-01-26 22:18 ` [PATCH V3 net-next 07/14] net/ena: refactor ena_get_stats64 to be atomic context safe Netanel Belgazal
2017-01-26 22:18 ` Netanel Belgazal [this message]
2017-01-26 22:18 ` [PATCH V3 net-next 09/14] net/ena: use napi_complete_done() return value Netanel Belgazal
2017-01-26 22:18 ` [PATCH V3 net-next 10/14] net/ena: use READ_ONCE to access completion descriptors Netanel Belgazal
2017-01-26 22:18 ` [PATCH V3 net-next 11/14] net/ena: reduce the severity of ena printouts Netanel Belgazal
2017-01-26 22:18 ` [PATCH V3 net-next 12/14] net/ena: change driver's default timeouts Netanel Belgazal
2017-01-26 22:18 ` [PATCH V3 net-next 13/14] net/ena: change condition for host attribute configuration Netanel Belgazal
2017-01-26 22:18 ` [PATCH V3 net-next 14/14] net/ena: update driver version to 1.1.2 Netanel Belgazal
2017-01-27 16:07 ` [PATCH V3 net-next 00/14] Bug Fixes in ENA driver David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1485469096-5271-9-git-send-email-netanel@annapurnalabs.com \
    --to=netanel@annapurnalabs.com \
    --cc=alex@annapurnalabs.com \
    --cc=aliguori@amazon.com \
    --cc=davem@davemloft.net \
    --cc=dwmw@amazon.com \
    --cc=eric.dumazet@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=msw@amazon.com \
    --cc=nafea@annapurnalabs.com \
    --cc=netdev@vger.kernel.org \
    --cc=saeed@annapurnalabs.com \
    --cc=zorik@annapurnalabs.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).