public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
From: Alex Estrin <alex.estrin-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Subject: [PATCH v2] IB/ipoib: fix for rare multicast join race condition.
Date: Sat, 06 Feb 2016 08:50:41 -0500	[thread overview]
Message-ID: <20160206135041.11630.77019.stgit@phlsvlogin03.ph.intel.com> (raw)

A narrow window for race condition still exist between
multicast join thread and *dev_flush workers.
A kernel crash caused by prolong erratic link state changes
was observed (most likely a faulty cabling):

[167275.656270] BUG: unable to handle kernel NULL pointer dereference at
0000000000000020
[167275.665973] IP: [<ffffffffa05f8f2e>] ipoib_mcast_join+0xae/0x1d0 [ib_ipoib]
[167275.674443] PGD 0
[167275.677373] Oops: 0000 [#1] SMP
...
[167275.977530] Call Trace:
[167275.982225]  [<ffffffffa05f92f0>] ? ipoib_mcast_free+0x200/0x200 [ib_ipoib]
[167275.992024]  [<ffffffffa05fa1b7>] ipoib_mcast_join_task+0x2a7/0x490
[ib_ipoib]
[167276.002149]  [<ffffffff8109d5fb>] process_one_work+0x17b/0x470
[167276.010754]  [<ffffffff8109e3cb>] worker_thread+0x11b/0x400
[167276.019088]  [<ffffffff8109e2b0>] ? rescuer_thread+0x400/0x400
[167276.027737]  [<ffffffff810a5aef>] kthread+0xcf/0xe0
Here was a hit spot:
ipoib_mcast_join() {
..............
      rec.qkey      = priv->broadcast->mcmember.qkey;
                                       ^^^^^^^
.....
 }
Proposed patch should prevent multicast join task to continue
if link state change is detected.

Signed-off-by: Alex Estrin <alex.estrin-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---

Changes from v1:
No need to lock again if error detected.
---
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |   14 ++++++++++++--
 1 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 050dfa1..3ce2f0a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -456,7 +456,7 @@ out_locked:
 	return status;
 }
 
-static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
+static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_sa_multicast *multicast;
@@ -466,6 +466,9 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
 	ib_sa_comp_mask comp_mask;
 	int ret = 0;
 
+	if (!priv->broadcast)
+		return -EINVAL;
+
 	ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
 
 	rec.mgid     = mcast->mcmember.mgid;
@@ -539,6 +542,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
 		spin_unlock_irq(&priv->lock);
 		complete(&mcast->done);
 	}
+	return 0;
 }
 
 void ipoib_mcast_join_task(struct work_struct *work)
@@ -611,6 +615,11 @@ void ipoib_mcast_join_task(struct work_struct *work)
 	 * and attached
 	 */
 	list_for_each_entry(mcast, &priv->multicast_list, list) {
+		if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
+			mcast = NULL;
+			delay_until = 0;
+			goto out;
+		}
 		if (IS_ERR_OR_NULL(mcast->mc) &&
 		    !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
 		    (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ||
@@ -621,7 +630,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
 				init_completion(&mcast->done);
 				set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
 				spin_unlock_irq(&priv->lock);
-				ipoib_mcast_join(dev, mcast);
+				if (ipoib_mcast_join(dev, mcast) != 0)
+					return;
 				spin_lock_irq(&priv->lock);
 			} else if (!delay_until ||
 				 time_before(mcast->delay_until, delay_until))

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

             reply	other threads:[~2016-02-06 13:50 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-06 13:50 Alex Estrin [this message]
     [not found] ` <20160206135041.11630.77019.stgit-u2TXY/5TJkdZ7WVY1cDZ9q2pdiUAq4bhAL8bYrjMMd8@public.gmane.org>
2016-02-06 17:18   ` [PATCH v2] IB/ipoib: fix for rare multicast join race condition Leon Romanovsky
     [not found]     ` <20160206171828.GD8584-2ukJVAZIZ/Y@public.gmane.org>
2016-02-08 12:23       ` Estrin, Alex
     [not found]         ` <F3529576D8E232409F431C309E29399328F6C7FE-8k97q/ur5Z1cIJlls4ac1rfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2016-02-08 13:22           ` leon-2ukJVAZIZ/Y
2016-02-08 13:34   ` Erez Shitrit
     [not found]     ` <CAAk-MO8b_bpb=cd0Ki7wu3sUSG_rC+fS0sxhmdn5cA-91WfR4g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-02-08 16:43       ` Estrin, Alex
     [not found]         ` <F3529576D8E232409F431C309E29399328F6C892-8k97q/ur5Z1cIJlls4ac1rfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2016-02-09  7:18           ` Leon Romanovsky
     [not found]             ` <20160209071847.GA14741-2ukJVAZIZ/Y@public.gmane.org>
2016-02-09 12:06               ` Estrin, Alex

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160206135041.11630.77019.stgit@phlsvlogin03.ph.intel.com \
    --to=alex.estrin-ral2jqcrhueavxtiumwx3w@public.gmane.org \
    --cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox