public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Salil Mehta <salil.mehta@huawei.com>
To: <davem@davemloft.net>
Cc: <salil.mehta@huawei.com>, <yisen.zhuang@huawei.com>,
	<lipeng321@huawei.com>, <mehta.salil@opnsrc.net>,
	<netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<linuxarm@huawei.com>, Shiju Jose <shiju.jose@huawei.com>
Subject: [PATCH net-next 2/7] net: hns3: Add PCIe AER error recovery
Date: Fri, 19 Oct 2018 20:15:27 +0100	[thread overview]
Message-ID: <20181019191532.10088-3-salil.mehta@huawei.com> (raw)
In-Reply-To: <20181019191532.10088-1-salil.mehta@huawei.com>

From: Shiju Jose <shiju.jose@huawei.com>

This patch adds the error recovery for the HNS hw errors.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h          |  2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c      | 20 +++++++++++++++++++-
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c  | 17 +++++++++++++----
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c    |  3 ++-
 4 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 2af3a2d..e82e4ca 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -402,7 +402,7 @@ struct hnae3_ae_ops {
 	int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
 				  u16 vlan, u8 qos, __be16 proto);
 	int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable);
-	void (*reset_event)(struct hnae3_handle *handle);
+	void (*reset_event)(struct pci_dev *pdev, struct hnae3_handle *handle);
 	void (*get_channels)(struct hnae3_handle *handle,
 			     struct ethtool_channels *ch);
 	void (*get_tqps_and_rss_info)(struct hnae3_handle *h,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 3c6fa39..32f3aca8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -9,6 +9,7 @@
 #include <linux/ipv6.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/aer.h>
 #include <linux/skbuff.h>
 #include <linux/sctp.h>
 #include <linux/vermagic.h>
@@ -1613,7 +1614,7 @@ static void hns3_nic_net_timeout(struct net_device *ndev)
 
 	/* request the reset */
 	if (h->ae_algo->ops->reset_event)
-		h->ae_algo->ops->reset_event(h);
+		h->ae_algo->ops->reset_event(h->pdev, h);
 }
 
 static const struct net_device_ops hns3_nic_netdev_ops = {
@@ -1796,8 +1797,25 @@ static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev,
 	return ret;
 }
 
+static pci_ers_result_t hns3_slot_reset(struct pci_dev *pdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+	struct device *dev = &pdev->dev;
+
+	dev_info(dev, "requesting reset due to PCI error\n");
+
+	/* request the reset */
+	if (ae_dev->ops->reset_event) {
+		ae_dev->ops->reset_event(pdev, NULL);
+		return PCI_ERS_RESULT_RECOVERED;
+	}
+
+	return PCI_ERS_RESULT_DISCONNECT;
+}
+
 static const struct pci_error_handlers hns3_err_handler = {
 	.error_detected = hns3_error_detected,
+	.slot_reset     = hns3_slot_reset,
 };
 
 static struct pci_driver hns3_driver = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 94d3678..5075365 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2489,12 +2489,18 @@ static void hclge_reset(struct hclge_dev *hdev)
 	ae_dev->reset_type = HNAE3_NONE_RESET;
 }
 
-static void hclge_reset_event(struct hnae3_handle *handle)
+static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle)
 {
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+	struct hclge_dev *hdev = ae_dev->priv;
 
-	/* check if this is a new reset request and we are not here just because
+	/* We might end up getting called broadly because of 2 below cases:
+	 * 1. Recoverable error was conveyed through APEI and only way to bring
+	 *    normalcy is to reset.
+	 * 2. A new reset request from the stack due to timeout
+	 *
+	 * For the first case,error event might not have ae handle available.
+	 * check if this is a new reset request and we are not here just because
 	 * last reset attempt did not succeed and watchdog hit us again. We will
 	 * know this if last reset request did not occur very recently (watchdog
 	 * timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz)
@@ -2503,6 +2509,9 @@ static void hclge_reset_event(struct hnae3_handle *handle)
 	 * want to make sure we throttle the reset request. Therefore, we will
 	 * not allow it again before 3*HZ times.
 	 */
+	if (!handle)
+		handle = &hdev->vport[0].nic;
+
 	if (time_before(jiffies, (handle->last_reset_time + 3 * HZ)))
 		return;
 	else if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ)))
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index ac67fec..e0a86a5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1214,7 +1214,8 @@ static int hclgevf_do_reset(struct hclgevf_dev *hdev)
 	return status;
 }
 
-static void hclgevf_reset_event(struct hnae3_handle *handle)
+static void hclgevf_reset_event(struct pci_dev *pdev,
+				struct hnae3_handle *handle)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 
-- 
2.7.4

  parent reply	other threads:[~2018-10-19 19:15 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-19 19:15 [PATCH net-next 0/7] Adds support of RAS Error Handling in HNS3 Driver Salil Mehta
2018-10-19 19:15 ` [PATCH net-next 1/7] net: hns3: Add PCIe AER callback error_detected Salil Mehta
2018-10-19 19:15 ` Salil Mehta [this message]
2018-10-19 19:15 ` [PATCH net-next 3/7] net: hns3: Add support to enable and disable hw errors Salil Mehta
2018-10-19 19:15 ` [PATCH net-next 4/7] net: hns3: Add enable and process common ecc errors Salil Mehta
2018-10-19 19:15 ` [PATCH net-next 5/7] net: hns3: Add enable and process hw errors from IGU, EGU and NCSI Salil Mehta
2018-10-19 19:15 ` [PATCH net-next 6/7] net: hns3: Add enable and process hw errors from PPP Salil Mehta
2018-10-23 11:28   ` Dan Carpenter
2018-10-23 21:37     ` Salil Mehta
2018-10-24  6:19       ` Dan Carpenter
2018-10-24  6:22         ` Salil Mehta
2018-10-19 19:15 ` [PATCH net-next 7/7] net: hns3: Add enable and process hw errors of TM scheduler Salil Mehta
2018-10-23  2:32 ` [PATCH net-next 0/7] Adds support of RAS Error Handling in HNS3 Driver David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181019191532.10088-3-salil.mehta@huawei.com \
    --to=salil.mehta@huawei.com \
    --cc=davem@davemloft.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxarm@huawei.com \
    --cc=lipeng321@huawei.com \
    --cc=mehta.salil@opnsrc.net \
    --cc=netdev@vger.kernel.org \
    --cc=shiju.jose@huawei.com \
    --cc=yisen.zhuang@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox