netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Wei Yang <weiyang@linux.vnet.ibm.com>
To: Wei Yang <weiyang@linux.vnet.ibm.com>
Cc: davem@davemloft.net, netdev@vger.kernel.org,
	Bjorn Helgaas <bhelgaas@google.com>,
	Amir Vadai <amirv@mellanox.com>,
	Jack Morgenstein <jackm@dev.mellanox.co.il>,
	Or Gerlitz <ogerlitz@mellanox.com>
Subject: Re: [PATCH 3.14-stable] net/mlx4_core: Preserve pci_dev_data after __mlx4_remove_one()
Date: Tue, 17 Jun 2014 10:49:51 +0800	[thread overview]
Message-ID: <20140617024951.GB7886@richard> (raw)
In-Reply-To: <1401607475-8367-1-git-send-email-weiyang@linux.vnet.ibm.com>

David,

I saw the fix for the crash during reboot is merged in mainline, while I am
not sure how to check these backport is merged in the stable tree(not familiar
to check it in stable tree.)

Do you suggest me to include that fix and send these backport again? Or?

On Sun, Jun 01, 2014 at 03:24:35PM +0800, Wei Yang wrote:
>pci_match_id() just match the static pci_device_id, which may return NULL if
>someone binds the driver to a device manually using
>/sys/bus/pci/drivers/.../new_id.
>
>This patch wrap up a helper function __mlx4_remove_one() which does the tear
>down function but preserve the drv_data. Functions like
>mlx4_pci_err_detected() and mlx4_restart_one() will call this one with out
>releasing drvdata.
>
>Fixes: 97a5221 "net/mlx4_core: pass pci_device_id.driver_data to __mlx4_init_one during reset".
>
>CC: Bjorn Helgaas <bhelgaas@google.com>
>CC: Amir Vadai <amirv@mellanox.com>
>CC: Jack Morgenstein <jackm@dev.mellanox.co.il>
>CC: Or Gerlitz <ogerlitz@mellanox.com>
>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>Acked-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
>Signed-off-by: David S. Miller <davem@davemloft.net>
>---
> drivers/net/ethernet/mellanox/mlx4/main.c |  170 ++++++++++++++++-------------
> drivers/net/ethernet/mellanox/mlx4/mlx4.h |    1 +
> 2 files changed, 95 insertions(+), 76 deletions(-)
>
>diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
>index d413e60..b29bbe1 100644
>--- a/drivers/net/ethernet/mellanox/mlx4/main.c
>+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
>@@ -2275,13 +2275,8 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
> 	/* Allow large DMA segments, up to the firmware limit of 1 GB */
> 	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
>
>-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
>-	if (!priv) {
>-		err = -ENOMEM;
>-		goto err_release_regions;
>-	}
>-
>-	dev       = &priv->dev;
>+	dev       = pci_get_drvdata(pdev);
>+	priv      = mlx4_priv(dev);
> 	dev->pdev = pdev;
> 	INIT_LIST_HEAD(&priv->ctx_list);
> 	spin_lock_init(&priv->ctx_lock);
>@@ -2464,8 +2459,7 @@ slave_start:
> 	mlx4_sense_init(dev);
> 	mlx4_start_sense(dev);
>
>-	priv->pci_dev_data = pci_dev_data;
>-	pci_set_drvdata(pdev, dev);
>+	priv->removed = 0;
>
> 	return 0;
>
>@@ -2531,84 +2525,108 @@ err_disable_pdev:
>
> static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
> {
>+	struct mlx4_priv *priv;
>+	struct mlx4_dev *dev;
>+
> 	printk_once(KERN_INFO "%s", mlx4_version);
>
>+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
>+	if (!priv)
>+		return -ENOMEM;
>+
>+	dev       = &priv->dev;
>+	pci_set_drvdata(pdev, dev);
>+	priv->pci_dev_data = id->driver_data;
>+
> 	return __mlx4_init_one(pdev, id->driver_data);
> }
>
>-static void mlx4_remove_one(struct pci_dev *pdev)
>+static void __mlx4_remove_one(struct pci_dev *pdev)
> {
> 	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
> 	struct mlx4_priv *priv = mlx4_priv(dev);
>+	int               pci_dev_data;
> 	int p;
>
>-	if (dev) {
>-		/* in SRIOV it is not allowed to unload the pf's
>-		 * driver while there are alive vf's */
>-		if (mlx4_is_master(dev)) {
>-			if (mlx4_how_many_lives_vf(dev))
>-				printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n");
>-		}
>-		mlx4_stop_sense(dev);
>-		mlx4_unregister_device(dev);
>+	if (priv->removed)
>+		return;
>
>-		for (p = 1; p <= dev->caps.num_ports; p++) {
>-			mlx4_cleanup_port_info(&priv->port[p]);
>-			mlx4_CLOSE_PORT(dev, p);
>-		}
>+	pci_dev_data = priv->pci_dev_data;
>
>-		if (mlx4_is_master(dev))
>-			mlx4_free_resource_tracker(dev,
>-						   RES_TR_FREE_SLAVES_ONLY);
>-
>-		mlx4_cleanup_counters_table(dev);
>-		mlx4_cleanup_qp_table(dev);
>-		mlx4_cleanup_srq_table(dev);
>-		mlx4_cleanup_cq_table(dev);
>-		mlx4_cmd_use_polling(dev);
>-		mlx4_cleanup_eq_table(dev);
>-		mlx4_cleanup_mcg_table(dev);
>-		mlx4_cleanup_mr_table(dev);
>-		mlx4_cleanup_xrcd_table(dev);
>-		mlx4_cleanup_pd_table(dev);
>+	/* in SRIOV it is not allowed to unload the pf's
>+	 * driver while there are alive vf's */
>+	if (mlx4_is_master(dev) && mlx4_how_many_lives_vf(dev))
>+		printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n");
>+	mlx4_stop_sense(dev);
>+	mlx4_unregister_device(dev);
>
>-		if (mlx4_is_master(dev))
>-			mlx4_free_resource_tracker(dev,
>-						   RES_TR_FREE_STRUCTS_ONLY);
>-
>-		iounmap(priv->kar);
>-		mlx4_uar_free(dev, &priv->driver_uar);
>-		mlx4_cleanup_uar_table(dev);
>-		if (!mlx4_is_slave(dev))
>-			mlx4_clear_steering(dev);
>-		mlx4_free_eq_table(dev);
>-		if (mlx4_is_master(dev))
>-			mlx4_multi_func_cleanup(dev);
>-		mlx4_close_hca(dev);
>-		if (mlx4_is_slave(dev))
>-			mlx4_multi_func_cleanup(dev);
>-		mlx4_cmd_cleanup(dev);
>-
>-		if (dev->flags & MLX4_FLAG_MSI_X)
>-			pci_disable_msix(pdev);
>-		if (dev->flags & MLX4_FLAG_SRIOV) {
>-			mlx4_warn(dev, "Disabling SR-IOV\n");
>-			pci_disable_sriov(pdev);
>-		}
>+	for (p = 1; p <= dev->caps.num_ports; p++) {
>+		mlx4_cleanup_port_info(&priv->port[p]);
>+		mlx4_CLOSE_PORT(dev, p);
>+	}
>
>-		if (!mlx4_is_slave(dev))
>-			mlx4_free_ownership(dev);
>+	if (mlx4_is_master(dev))
>+		mlx4_free_resource_tracker(dev,
>+					   RES_TR_FREE_SLAVES_ONLY);
>+
>+	mlx4_cleanup_counters_table(dev);
>+	mlx4_cleanup_qp_table(dev);
>+	mlx4_cleanup_srq_table(dev);
>+	mlx4_cleanup_cq_table(dev);
>+	mlx4_cmd_use_polling(dev);
>+	mlx4_cleanup_eq_table(dev);
>+	mlx4_cleanup_mcg_table(dev);
>+	mlx4_cleanup_mr_table(dev);
>+	mlx4_cleanup_xrcd_table(dev);
>+	mlx4_cleanup_pd_table(dev);
>
>-		kfree(dev->caps.qp0_tunnel);
>-		kfree(dev->caps.qp0_proxy);
>-		kfree(dev->caps.qp1_tunnel);
>-		kfree(dev->caps.qp1_proxy);
>+	if (mlx4_is_master(dev))
>+		mlx4_free_resource_tracker(dev,
>+					   RES_TR_FREE_STRUCTS_ONLY);
>
>-		kfree(priv);
>-		pci_release_regions(pdev);
>-		pci_disable_device(pdev);
>-		pci_set_drvdata(pdev, NULL);
>+	iounmap(priv->kar);
>+	mlx4_uar_free(dev, &priv->driver_uar);
>+	mlx4_cleanup_uar_table(dev);
>+	if (!mlx4_is_slave(dev))
>+		mlx4_clear_steering(dev);
>+	mlx4_free_eq_table(dev);
>+	if (mlx4_is_master(dev))
>+		mlx4_multi_func_cleanup(dev);
>+	mlx4_close_hca(dev);
>+	if (mlx4_is_slave(dev))
>+		mlx4_multi_func_cleanup(dev);
>+	mlx4_cmd_cleanup(dev);
>+
>+	if (dev->flags & MLX4_FLAG_MSI_X)
>+		pci_disable_msix(pdev);
>+	if (dev->flags & MLX4_FLAG_SRIOV) {
>+		mlx4_warn(dev, "Disabling SR-IOV\n");
>+		pci_disable_sriov(pdev);
> 	}
>+
>+	if (!mlx4_is_slave(dev))
>+		mlx4_free_ownership(dev);
>+
>+	kfree(dev->caps.qp0_tunnel);
>+	kfree(dev->caps.qp0_proxy);
>+	kfree(dev->caps.qp1_tunnel);
>+	kfree(dev->caps.qp1_proxy);
>+
>+	pci_release_regions(pdev);
>+	pci_disable_device(pdev);
>+	memset(priv, 0, sizeof(*priv));
>+	priv->pci_dev_data = pci_dev_data;
>+	priv->removed = 1;
>+}
>+
>+static void mlx4_remove_one(struct pci_dev *pdev)
>+{
>+	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
>+	struct mlx4_priv *priv = mlx4_priv(dev);
>+
>+	__mlx4_remove_one(pdev);
>+	kfree(priv);
>+	pci_set_drvdata(pdev, NULL);
> }
>
> int mlx4_restart_one(struct pci_dev *pdev)
>@@ -2618,7 +2636,7 @@ int mlx4_restart_one(struct pci_dev *pdev)
> 	int		  pci_dev_data;
>
> 	pci_dev_data = priv->pci_dev_data;
>-	mlx4_remove_one(pdev);
>+	__mlx4_remove_one(pdev);
> 	return __mlx4_init_one(pdev, pci_dev_data);
> }
>
>@@ -2673,7 +2691,7 @@ MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
> static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
> 					      pci_channel_state_t state)
> {
>-	mlx4_remove_one(pdev);
>+	__mlx4_remove_one(pdev);
>
> 	return state == pci_channel_io_perm_failure ?
> 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
>@@ -2681,11 +2699,11 @@ static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
>
> static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
> {
>-	const struct pci_device_id *id;
>-	int ret;
>+	struct mlx4_dev	 *dev  = pci_get_drvdata(pdev);
>+	struct mlx4_priv *priv = mlx4_priv(dev);
>+	int               ret;
>
>-	id = pci_match_id(mlx4_pci_table, pdev);
>-	ret = __mlx4_init_one(pdev, id->driver_data);
>+	ret = __mlx4_init_one(pdev, priv->pci_dev_data);
>
> 	return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
> }
>diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>index 7aec6c8..99d7a28 100644
>--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>@@ -796,6 +796,7 @@ struct mlx4_priv {
> 	spinlock_t		ctx_lock;
>
> 	int			pci_dev_data;
>+	int                     removed;
>
> 	struct list_head        pgdir_list;
> 	struct mutex            pgdir_mutex;
>-- 
>1.7.9.5

-- 
Richard Yang
Help you, Help me

  parent reply	other threads:[~2014-06-17  2:49 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-01  7:24 [PATCH 3.14-stable] net/mlx4_core: Preserve pci_dev_data after __mlx4_remove_one() Wei Yang
2014-06-01  7:38 ` Wei Yang
2014-06-01  9:30   ` Or Gerlitz
2014-06-01  9:36     ` Or Gerlitz
2014-06-01 10:52       ` Or Gerlitz
2014-06-02 13:53     ` Wei Yang
2014-06-03  8:43       ` Or Gerlitz
2014-06-04  1:44         ` Wei Yang
2014-06-17  2:49 ` Wei Yang [this message]
2014-06-17  3:03   ` David Miller
2014-06-17  3:08     ` Wei Yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140617024951.GB7886@richard \
    --to=weiyang@linux.vnet.ibm.com \
    --cc=amirv@mellanox.com \
    --cc=bhelgaas@google.com \
    --cc=davem@davemloft.net \
    --cc=jackm@dev.mellanox.co.il \
    --cc=netdev@vger.kernel.org \
    --cc=ogerlitz@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).