All of lore.kernel.org
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
To: Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>,
	Erez Shitrit <erezsh-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Subject: [rdma-next v2 20/23] IB/ipoib: Sync between remove_one to sysfs calls that use rtnl_lock
Date: Tue, 15 Aug 2017 11:54:49 +0300	[thread overview]
Message-ID: <20170815085452.3546-21-leon@kernel.org> (raw)
In-Reply-To: <20170815085452.3546-1-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

From: Erez Shitrit <erezsh-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

In order to avoid deadlock between sysfs functions (like create/delete
child) and remove_one (both of them are using the sysfs lock and
rtnl_lock) the driver will use a state mutex for sync.

That will fix traces as the following:
schedule+0x3e/0x90
kernfs_drain+0x75/0xf0
? wait_woken+0x90/0x90
__kernfs_remove+0x12e/0x1c0
kernfs_remove+0x25/0x40
sysfs_remove_dir+0x57/0x90
kobject_del+0x22/0x60
device_del+0x195/0x230
 pm_runtime_set_memalloc_noio+0xac/0xf0
netdev_unregister_kobject+0x71/0x80
rollback_registered_many+0x205/0x2f0
rollback_registered+0x31/0x40
unregister_netdevice_queue+0x58/0xb0
unregister_netdev+0x20/0x30
ipoib_remove_one+0xb7/0x240 [ib_ipoib]
ib_unregister_device+0xbc/0x1b0 [ib_core]
ib_unregister_mad_agent+0x29/0x30 [ib_core]
mlx4_ib_remove+0x67/0x280 [mlx4_ib]
INFO: task echo:24082 blocked for more than 120 seconds.
Tainted: G           OE   4.1.12-37.5.1.el6uek.x86_64 #2
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this
message.
Call Trace:
schedule+0x3e/0x90
schedule_preempt_disabled+0xe/0x10
__mutex_lock_slowpath+0x95/0x110
? _rcu_barrier+0x177/0x220
mutex_lock+0x23/0x40
rtnl_lock+0x15/0x20
netdev_run_todo+0x81/0x1f0
rtnl_unlock+0xe/0x10
ipoib_vlan_delete+0x12f/0x1c0 [ib_ipoib]
delete_child+0x69/0x80 [ib_ipoib]
dev_attr_store+0x20/0x30
sysfs_kf_write+0x41/0x50

Signed-off-by: Erez Shitrit <erezsh-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Reviewed-by: Alex Vesker <valex-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 drivers/infiniband/ulp/ipoib/ipoib.h      |  1 +
 drivers/infiniband/ulp/ipoib/ipoib_cm.c   |  8 +++++++-
 drivers/infiniband/ulp/ipoib/ipoib_main.c |  5 +++++
 drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 22 ++++++++++++++++++----
 4 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 9e738104c2a1..7aa114cf53e3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -336,6 +336,7 @@ struct ipoib_dev_priv {
 	unsigned long flags;
 
 	struct rw_semaphore vlan_rwsem;
+	struct mutex        sysfs_lock; /* Protect sysfs running*/
 
 	struct rb_root  path_tree;
 	struct list_head path_list;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index f87d104837dc..85ef9f10bdee 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1507,9 +1507,14 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 	if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags))
 		return -EPERM;
 
-	if (!rtnl_trylock())
+	if (!mutex_trylock(&priv->sysfs_lock))
 		return restart_syscall();
 
+	if (!rtnl_trylock()) {
+		mutex_unlock(&priv->sysfs_lock);
+		return restart_syscall();
+	}
+
 	ret = ipoib_set_mode(dev, buf);
 
 	/* The assumption is that the function ipoib_set_mode returned
@@ -1518,6 +1523,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 	 */
 	if (ret != -EBUSY)
 		rtnl_unlock();
+	mutex_unlock(&priv->sysfs_lock);
 
 	return (!ret || ret == -EBUSY) ? count : ret;
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 645217232250..627ff8fb59b0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1876,6 +1876,7 @@ static void ipoib_build_priv(struct net_device *dev)
 	priv->dev = dev;
 	spin_lock_init(&priv->lock);
 	init_rwsem(&priv->vlan_rwsem);
+	mutex_init(&priv->sysfs_lock);
 
 	INIT_LIST_HEAD(&priv->path_list);
 	INIT_LIST_HEAD(&priv->child_intfs);
@@ -2325,7 +2326,11 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
 		cancel_delayed_work(&priv->neigh_reap_task);
 		flush_workqueue(priv->wq);
 
+		/* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */
+		mutex_lock(&priv->sysfs_lock);
 		unregister_netdev(priv->dev);
+		mutex_unlock(&priv->sysfs_lock);
+
 		rn->free_rdma_netdev(priv->dev);
 
 		list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 081b33deff1b..dd46abcf53c2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -133,12 +133,20 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 	snprintf(intf_name, sizeof intf_name, "%s.%04x",
 		 ppriv->dev->name, pkey);
 
-	if (!rtnl_trylock())
+	if (!mutex_trylock(&ppriv->sysfs_lock))
 		return restart_syscall();
 
+	if (!rtnl_trylock()) {
+		mutex_unlock(&ppriv->sysfs_lock);
+		return restart_syscall();
+	}
+
 	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
-	if (!priv)
+	if (!priv) {
+		rtnl_unlock();
+		mutex_unlock(&ppriv->sysfs_lock);
 		return -ENOMEM;
+	}
 
 	down_write(&ppriv->vlan_rwsem);
 
@@ -164,8 +172,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 
 out:
 	up_write(&ppriv->vlan_rwsem);
-
 	rtnl_unlock();
+	mutex_unlock(&ppriv->sysfs_lock);
 
 	if (result) {
 		free_netdev(priv->dev);
@@ -188,8 +196,13 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 	if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
 		return -EPERM;
 
-	if (!rtnl_trylock())
+	if (!mutex_trylock(&ppriv->sysfs_lock))
+		return restart_syscall();
+
+	if (!rtnl_trylock()) {
+		mutex_unlock(&ppriv->sysfs_lock);
 		return restart_syscall();
+	}
 
 	down_write(&ppriv->vlan_rwsem);
 	list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
@@ -208,6 +221,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 	}
 
 	rtnl_unlock();
+	mutex_unlock(&ppriv->sysfs_lock);
 
 	if (dev) {
 		free_netdev(dev);
-- 
2.14.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2017-08-15  8:54 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-15  8:54 [pull request][rdma-next v2 00/23] RDMA core, drivers and IPoIB fixes Leon Romanovsky
     [not found] ` <20170815085452.3546-1-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2017-08-15  8:54   ` [rdma-next v2 01/23] IB/cma: Fix erroneous validation of supported default GID type Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 02/23] IB/uverbs: Introduce and use helper functions to copy ah attributes Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 03/23] RDMA/mlx4: Don't use uninitialized variable Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 04/23] RDMA/mlx4: Fix create qp command alignment Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 05/23] RDMA/(core,ulp): Convert register/unregister event handler to be void Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 06/23] RDMA/core: Cleanup device capability enum Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 07/23] RDMA/core: Remove unimplemented node_types and node transport Leon Romanovsky
     [not found]     ` <20170815085452.3546-8-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2017-08-15 18:25       ` Suri Shelvapille
     [not found]         ` <SN1PR0301MB2127376FC9B3D170C878A47BDE8D0-VLIetriUNob/KlJXleiaIJwN6zqB+hSMnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-08-16  5:37           ` Leon Romanovsky
     [not found]             ` <20170816053744.GD24282-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-08-16 14:30               ` Suri Shelvapille
     [not found]                 ` <SN1PR0301MB21274177B67EE02B96F14424DE820-VLIetriUNob/KlJXleiaIJwN6zqB+hSMnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-08-16 16:05                   ` Devesh Sharma
2017-08-16 16:21                   ` Leon Romanovsky
     [not found]                     ` <20170816162103.GT24282-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-08-16 18:53                       ` Leon Romanovsky
     [not found]                 ` <CANjDDBh=3=5pwYO0_hcZDYVe2H98NMdTT_KrHz7j2Hm74ftDjw@mail.gmail.com>
     [not found]                   ` <CANjDDBh=3=5pwYO0_hcZDYVe2H98NMdTT_KrHz7j2Hm74ftDjw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-08-16 16:26                     ` Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 08/23] RDMA/core: Delete BUG() from unreachable flow Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 09/23] RDMA/core: Refactor get link layer wrapper Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 10/23] RDMA/mlx4: Remove gfp_mask argument from acquire_group call Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 11/23] RDMA/usnic: Fix remove address space warning Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 12/23] RDMA/mthca: Make explicit conversion to 64bit value Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 13/23] IB/mlx4: Fix some spelling mistakes Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 14/23] IB/mlx5: " Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 15/23] IB/mlx5: Add necessary delay drop assignment Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 16/23] IB/mlx4: Fix RSS QP type in creation verb Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 17/23] IB/mlx4: Fix struct mlx4_ib_create_wq alignment Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 18/23] IB/mlx4: Remove redundant attribute in mlx4_ib_create_qp_rss struct Leon Romanovsky
     [not found]     ` <20170815085452.3546-19-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2017-08-22 20:09       ` Doug Ledford
     [not found]         ` <1503432590.78641.10.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-08-23  4:43           ` Leon Romanovsky
     [not found]             ` <20170823044354.GJ1724-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-08-24  9:22               ` Leon Romanovsky
     [not found]                 ` <20170824092228.GF1724-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-08-24 19:40                   ` Doug Ledford
2017-08-15  8:54   ` [rdma-next v2 19/23] IB/mlx4: Check that reserved fields in mlx4_ib_create_qp_rss are zero Leon Romanovsky
2017-08-15  8:54   ` Leon Romanovsky [this message]
2017-08-15  8:54   ` [rdma-next v2 21/23] IB/ipoib: Add get statistics support to SRIOV VF Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 22/23] IB/rxe: Make rxe_counter_name static Leon Romanovsky
2017-08-15  8:54   ` [rdma-next v2 23/23] RDMA/mlx5: Limit scope of get vector affinity local function Leon Romanovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170815085452.3546-21-leon@kernel.org \
    --to=leon-dgejt+ai2ygdnm+yrofe0a@public.gmane.org \
    --cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=erezsh-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.