From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.linuxfoundation.org ([140.211.169.12]:60609 "EHLO mail.linuxfoundation.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755820AbcIUIGG (ORCPT ); Wed, 21 Sep 2016 04:06:06 -0400 Subject: Patch "net/mlx5: Fix pci error recovery flow" has been added to the 4.7-stable tree To: mohamad@mellanox.com, davem@davemloft.net, gregkh@linuxfoundation.org, saeedm@mellanox.com Cc: , From: Date: Wed, 21 Sep 2016 10:05:47 +0200 Message-ID: <147444514719447@kroah.com> MIME-Version: 1.0 Content-Type: text/plain; charset=ANSI_X3.4-1968 Content-Transfer-Encoding: 8bit Sender: stable-owner@vger.kernel.org List-ID: This is a note to let you know that I've just added the patch titled net/mlx5: Fix pci error recovery flow to the 4.7-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: net-mlx5-fix-pci-error-recovery-flow.patch and it can be found in the queue-4.7 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let know about it. >>From foo@baz Wed Sep 21 10:05:18 CEST 2016 From: Mohamad Haj Yahia Date: Thu, 18 Aug 2016 21:09:04 +0300 Subject: net/mlx5: Fix pci error recovery flow From: Mohamad Haj Yahia [ Upstream commit 1061c90f524963a0a90e7d2f9a6bfa666458af51 ] When PCI error is detected we should save the state of the pci prior to disabling it. Also when receiving pci slot reset call we need to verify that the device is responsive. Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 59 ++++++++++++------------- 1 file changed, 29 insertions(+), 30 deletions(-) --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1392,36 +1392,12 @@ static pci_ers_result_t mlx5_pci_err_det dev_info(&pdev->dev, "%s was called\n", __func__); mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv); + pci_save_state(pdev); mlx5_pci_disable_device(dev); return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; } -static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) -{ - struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - int err = 0; - - dev_info(&pdev->dev, "%s was called\n", __func__); - - err = mlx5_pci_enable_device(dev); - if (err) { - dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n" - , __func__, err); - return PCI_ERS_RESULT_DISCONNECT; - } - pci_set_master(pdev); - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - - return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; -} - -void mlx5_disable_device(struct mlx5_core_dev *dev) -{ - mlx5_pci_err_detected(dev->pdev, 0); -} - /* wait for the device to show vital signs by waiting * for the health counter to start counting. */ @@ -1449,21 +1425,44 @@ static int wait_vital(struct pci_dev *pd return -ETIMEDOUT; } -static void mlx5_pci_resume(struct pci_dev *pdev) +static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_priv *priv = &dev->priv; int err; dev_info(&pdev->dev, "%s was called\n", __func__); - pci_save_state(pdev); - err = wait_vital(pdev); + err = mlx5_pci_enable_device(dev); if (err) { + dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n" + , __func__, err); + return PCI_ERS_RESULT_DISCONNECT; + } + + pci_set_master(pdev); + pci_restore_state(pdev); + + if (wait_vital(pdev)) { dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__); - return; + return PCI_ERS_RESULT_DISCONNECT; } + return PCI_ERS_RESULT_RECOVERED; +} + +void mlx5_disable_device(struct mlx5_core_dev *dev) +{ + mlx5_pci_err_detected(dev->pdev, 0); +} + +static void mlx5_pci_resume(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_priv *priv = &dev->priv; + int err; + + dev_info(&pdev->dev, "%s was called\n", __func__); + err = mlx5_load_one(dev, priv); if (err) dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" Patches currently in stable-queue which might be from mohamad@mellanox.com are queue-4.7/net-mlx5-fix-pci-error-recovery-flow.patch