From mboxrd@z Thu Jan 1 00:00:00 1970 From: Wei Yang Subject: Re: [PATCH 3.14-stable] net/mlx4_core: Preserve pci_dev_data after __mlx4_remove_one() Date: Tue, 17 Jun 2014 10:49:51 +0800 Message-ID: <20140617024951.GB7886@richard> References: <1401607475-8367-1-git-send-email-weiyang@linux.vnet.ibm.com> Reply-To: Wei Yang Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: davem@davemloft.net, netdev@vger.kernel.org, Bjorn Helgaas , Amir Vadai , Jack Morgenstein , Or Gerlitz To: Wei Yang Return-path: Received: from e23smtp05.au.ibm.com ([202.81.31.147]:54827 "EHLO e23smtp05.au.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754056AbaFQCt7 (ORCPT ); Mon, 16 Jun 2014 22:49:59 -0400 Received: from /spool/local by e23smtp05.au.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 17 Jun 2014 12:49:57 +1000 Received: from d23relay05.au.ibm.com (d23relay05.au.ibm.com [9.190.235.152]) by d23dlp02.au.ibm.com (Postfix) with ESMTP id 16CE52BB0040 for ; Tue, 17 Jun 2014 12:49:54 +1000 (EST) Received: from d23av04.au.ibm.com (d23av04.au.ibm.com [9.190.235.139]) by d23relay05.au.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s5H2RjPe46661856 for ; Tue, 17 Jun 2014 12:27:46 +1000 Received: from d23av04.au.ibm.com (localhost [127.0.0.1]) by d23av04.au.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s5H2nqKM016446 for ; Tue, 17 Jun 2014 12:49:53 +1000 Content-Disposition: inline In-Reply-To: <1401607475-8367-1-git-send-email-weiyang@linux.vnet.ibm.com> Sender: netdev-owner@vger.kernel.org List-ID: David, I saw the fix for the crash during reboot is merged in mainline, while I am not sure how to check these backport is merged in the stable tree(not familiar to check it in stable tree.) Do you suggest me to include that fix and send these backport again? Or? On Sun, Jun 01, 2014 at 03:24:35PM +0800, Wei Yang wrote: >pci_match_id() just match the static pci_device_id, which may return NULL if >someone binds the driver to a device manually using >/sys/bus/pci/drivers/.../new_id. > >This patch wrap up a helper function __mlx4_remove_one() which does the tear >down function but preserve the drv_data. Functions like >mlx4_pci_err_detected() and mlx4_restart_one() will call this one with out >releasing drvdata. > >Fixes: 97a5221 "net/mlx4_core: pass pci_device_id.driver_data to __mlx4_init_one during reset". > >CC: Bjorn Helgaas >CC: Amir Vadai >CC: Jack Morgenstein >CC: Or Gerlitz >Signed-off-by: Wei Yang >Acked-by: Jack Morgenstein >Signed-off-by: David S. Miller >--- > drivers/net/ethernet/mellanox/mlx4/main.c | 170 ++++++++++++++++------------- > drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + > 2 files changed, 95 insertions(+), 76 deletions(-) > >diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c >index d413e60..b29bbe1 100644 >--- a/drivers/net/ethernet/mellanox/mlx4/main.c >+++ b/drivers/net/ethernet/mellanox/mlx4/main.c >@@ -2275,13 +2275,8 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) > /* Allow large DMA segments, up to the firmware limit of 1 GB */ > dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); > >- priv = kzalloc(sizeof(*priv), GFP_KERNEL); >- if (!priv) { >- err = -ENOMEM; >- goto err_release_regions; >- } >- >- dev = &priv->dev; >+ dev = pci_get_drvdata(pdev); >+ priv = mlx4_priv(dev); > dev->pdev = pdev; > INIT_LIST_HEAD(&priv->ctx_list); > spin_lock_init(&priv->ctx_lock); >@@ -2464,8 +2459,7 @@ slave_start: > mlx4_sense_init(dev); > mlx4_start_sense(dev); > >- priv->pci_dev_data = pci_dev_data; >- pci_set_drvdata(pdev, dev); >+ priv->removed = 0; > > return 0; > >@@ -2531,84 +2525,108 @@ err_disable_pdev: > > static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) > { >+ struct mlx4_priv *priv; >+ struct mlx4_dev *dev; >+ > printk_once(KERN_INFO "%s", mlx4_version); > >+ priv = kzalloc(sizeof(*priv), GFP_KERNEL); >+ if (!priv) >+ return -ENOMEM; >+ >+ dev = &priv->dev; >+ pci_set_drvdata(pdev, dev); >+ priv->pci_dev_data = id->driver_data; >+ > return __mlx4_init_one(pdev, id->driver_data); > } > >-static void mlx4_remove_one(struct pci_dev *pdev) >+static void __mlx4_remove_one(struct pci_dev *pdev) > { > struct mlx4_dev *dev = pci_get_drvdata(pdev); > struct mlx4_priv *priv = mlx4_priv(dev); >+ int pci_dev_data; > int p; > >- if (dev) { >- /* in SRIOV it is not allowed to unload the pf's >- * driver while there are alive vf's */ >- if (mlx4_is_master(dev)) { >- if (mlx4_how_many_lives_vf(dev)) >- printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n"); >- } >- mlx4_stop_sense(dev); >- mlx4_unregister_device(dev); >+ if (priv->removed) >+ return; > >- for (p = 1; p <= dev->caps.num_ports; p++) { >- mlx4_cleanup_port_info(&priv->port[p]); >- mlx4_CLOSE_PORT(dev, p); >- } >+ pci_dev_data = priv->pci_dev_data; > >- if (mlx4_is_master(dev)) >- mlx4_free_resource_tracker(dev, >- RES_TR_FREE_SLAVES_ONLY); >- >- mlx4_cleanup_counters_table(dev); >- mlx4_cleanup_qp_table(dev); >- mlx4_cleanup_srq_table(dev); >- mlx4_cleanup_cq_table(dev); >- mlx4_cmd_use_polling(dev); >- mlx4_cleanup_eq_table(dev); >- mlx4_cleanup_mcg_table(dev); >- mlx4_cleanup_mr_table(dev); >- mlx4_cleanup_xrcd_table(dev); >- mlx4_cleanup_pd_table(dev); >+ /* in SRIOV it is not allowed to unload the pf's >+ * driver while there are alive vf's */ >+ if (mlx4_is_master(dev) && mlx4_how_many_lives_vf(dev)) >+ printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n"); >+ mlx4_stop_sense(dev); >+ mlx4_unregister_device(dev); > >- if (mlx4_is_master(dev)) >- mlx4_free_resource_tracker(dev, >- RES_TR_FREE_STRUCTS_ONLY); >- >- iounmap(priv->kar); >- mlx4_uar_free(dev, &priv->driver_uar); >- mlx4_cleanup_uar_table(dev); >- if (!mlx4_is_slave(dev)) >- mlx4_clear_steering(dev); >- mlx4_free_eq_table(dev); >- if (mlx4_is_master(dev)) >- mlx4_multi_func_cleanup(dev); >- mlx4_close_hca(dev); >- if (mlx4_is_slave(dev)) >- mlx4_multi_func_cleanup(dev); >- mlx4_cmd_cleanup(dev); >- >- if (dev->flags & MLX4_FLAG_MSI_X) >- pci_disable_msix(pdev); >- if (dev->flags & MLX4_FLAG_SRIOV) { >- mlx4_warn(dev, "Disabling SR-IOV\n"); >- pci_disable_sriov(pdev); >- } >+ for (p = 1; p <= dev->caps.num_ports; p++) { >+ mlx4_cleanup_port_info(&priv->port[p]); >+ mlx4_CLOSE_PORT(dev, p); >+ } > >- if (!mlx4_is_slave(dev)) >- mlx4_free_ownership(dev); >+ if (mlx4_is_master(dev)) >+ mlx4_free_resource_tracker(dev, >+ RES_TR_FREE_SLAVES_ONLY); >+ >+ mlx4_cleanup_counters_table(dev); >+ mlx4_cleanup_qp_table(dev); >+ mlx4_cleanup_srq_table(dev); >+ mlx4_cleanup_cq_table(dev); >+ mlx4_cmd_use_polling(dev); >+ mlx4_cleanup_eq_table(dev); >+ mlx4_cleanup_mcg_table(dev); >+ mlx4_cleanup_mr_table(dev); >+ mlx4_cleanup_xrcd_table(dev); >+ mlx4_cleanup_pd_table(dev); > >- kfree(dev->caps.qp0_tunnel); >- kfree(dev->caps.qp0_proxy); >- kfree(dev->caps.qp1_tunnel); >- kfree(dev->caps.qp1_proxy); >+ if (mlx4_is_master(dev)) >+ mlx4_free_resource_tracker(dev, >+ RES_TR_FREE_STRUCTS_ONLY); > >- kfree(priv); >- pci_release_regions(pdev); >- pci_disable_device(pdev); >- pci_set_drvdata(pdev, NULL); >+ iounmap(priv->kar); >+ mlx4_uar_free(dev, &priv->driver_uar); >+ mlx4_cleanup_uar_table(dev); >+ if (!mlx4_is_slave(dev)) >+ mlx4_clear_steering(dev); >+ mlx4_free_eq_table(dev); >+ if (mlx4_is_master(dev)) >+ mlx4_multi_func_cleanup(dev); >+ mlx4_close_hca(dev); >+ if (mlx4_is_slave(dev)) >+ mlx4_multi_func_cleanup(dev); >+ mlx4_cmd_cleanup(dev); >+ >+ if (dev->flags & MLX4_FLAG_MSI_X) >+ pci_disable_msix(pdev); >+ if (dev->flags & MLX4_FLAG_SRIOV) { >+ mlx4_warn(dev, "Disabling SR-IOV\n"); >+ pci_disable_sriov(pdev); > } >+ >+ if (!mlx4_is_slave(dev)) >+ mlx4_free_ownership(dev); >+ >+ kfree(dev->caps.qp0_tunnel); >+ kfree(dev->caps.qp0_proxy); >+ kfree(dev->caps.qp1_tunnel); >+ kfree(dev->caps.qp1_proxy); >+ >+ pci_release_regions(pdev); >+ pci_disable_device(pdev); >+ memset(priv, 0, sizeof(*priv)); >+ priv->pci_dev_data = pci_dev_data; >+ priv->removed = 1; >+} >+ >+static void mlx4_remove_one(struct pci_dev *pdev) >+{ >+ struct mlx4_dev *dev = pci_get_drvdata(pdev); >+ struct mlx4_priv *priv = mlx4_priv(dev); >+ >+ __mlx4_remove_one(pdev); >+ kfree(priv); >+ pci_set_drvdata(pdev, NULL); > } > > int mlx4_restart_one(struct pci_dev *pdev) >@@ -2618,7 +2636,7 @@ int mlx4_restart_one(struct pci_dev *pdev) > int pci_dev_data; > > pci_dev_data = priv->pci_dev_data; >- mlx4_remove_one(pdev); >+ __mlx4_remove_one(pdev); > return __mlx4_init_one(pdev, pci_dev_data); > } > >@@ -2673,7 +2691,7 @@ MODULE_DEVICE_TABLE(pci, mlx4_pci_table); > static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, > pci_channel_state_t state) > { >- mlx4_remove_one(pdev); >+ __mlx4_remove_one(pdev); > > return state == pci_channel_io_perm_failure ? > PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; >@@ -2681,11 +2699,11 @@ static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, > > static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) > { >- const struct pci_device_id *id; >- int ret; >+ struct mlx4_dev *dev = pci_get_drvdata(pdev); >+ struct mlx4_priv *priv = mlx4_priv(dev); >+ int ret; > >- id = pci_match_id(mlx4_pci_table, pdev); >- ret = __mlx4_init_one(pdev, id->driver_data); >+ ret = __mlx4_init_one(pdev, priv->pci_dev_data); > > return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; > } >diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h >index 7aec6c8..99d7a28 100644 >--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h >+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h >@@ -796,6 +796,7 @@ struct mlx4_priv { > spinlock_t ctx_lock; > > int pci_dev_data; >+ int removed; > > struct list_head pgdir_list; > struct mutex pgdir_mutex; >-- >1.7.9.5 -- Richard Yang Help you, Help me