All of lore.kernel.org
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon-2ukJVAZIZ/Y@public.gmane.org>
To: Alex Estrin <alex.estrin-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Subject: Re: [PATCH v2] IB/ipoib: fix for rare multicast join race condition.
Date: Sat, 6 Feb 2016 19:18:28 +0200	[thread overview]
Message-ID: <20160206171828.GD8584@leon.nu> (raw)
In-Reply-To: <20160206135041.11630.77019.stgit-u2TXY/5TJkdZ7WVY1cDZ9q2pdiUAq4bhAL8bYrjMMd8@public.gmane.org>

A number of very minor comments:
1. Please remove dot at the end of commit message title.

On Sat, Feb 06, 2016 at 08:50:41AM -0500, Alex Estrin wrote:
> A narrow window for race condition still exist between
> multicast join thread and *dev_flush workers.
> A kernel crash caused by prolong erratic link state changes
> was observed (most likely a faulty cabling):
> 
> [167275.656270] BUG: unable to handle kernel NULL pointer dereference at
> 0000000000000020
> [167275.665973] IP: [<ffffffffa05f8f2e>] ipoib_mcast_join+0xae/0x1d0 [ib_ipoib]
> [167275.674443] PGD 0
> [167275.677373] Oops: 0000 [#1] SMP
> ...
> [167275.977530] Call Trace:
> [167275.982225]  [<ffffffffa05f92f0>] ? ipoib_mcast_free+0x200/0x200 [ib_ipoib]
> [167275.992024]  [<ffffffffa05fa1b7>] ipoib_mcast_join_task+0x2a7/0x490
> [ib_ipoib]
> [167276.002149]  [<ffffffff8109d5fb>] process_one_work+0x17b/0x470
> [167276.010754]  [<ffffffff8109e3cb>] worker_thread+0x11b/0x400
> [167276.019088]  [<ffffffff8109e2b0>] ? rescuer_thread+0x400/0x400
> [167276.027737]  [<ffffffff810a5aef>] kthread+0xcf/0xe0
> Here was a hit spot:
> ipoib_mcast_join() {
> ..............
>       rec.qkey      = priv->broadcast->mcmember.qkey;
>                                        ^^^^^^^
> .....
>  }
> Proposed patch should prevent multicast join task to continue
> if link state change is detected.
> 
> Signed-off-by: Alex Estrin <alex.estrin-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> ---
> 
> Changes from v1:
> No need to lock again if error detected.
> ---
>  drivers/infiniband/ulp/ipoib/ipoib_multicast.c |   14 ++++++++++++--
>  1 files changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> index 050dfa1..3ce2f0a 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> @@ -456,7 +456,7 @@ out_locked:
>  	return status;
>  }
>  
> -static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
> +static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  	struct ib_sa_multicast *multicast;
> @@ -466,6 +466,9 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
>  	ib_sa_comp_mask comp_mask;
>  	int ret = 0;
>  
> +	if (!priv->broadcast)
> +		return -EINVAL;
> +
>  	ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
>  
>  	rec.mgid     = mcast->mcmember.mgid;
> @@ -539,6 +542,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
>  		spin_unlock_irq(&priv->lock);
>  		complete(&mcast->done);
>  	}
> +	return 0;

2. This function declared as a void, it is an error to add "return 0".

>  }
>  
>  void ipoib_mcast_join_task(struct work_struct *work)
> @@ -611,6 +615,11 @@ void ipoib_mcast_join_task(struct work_struct *work)
>  	 * and attached
>  	 */
>  	list_for_each_entry(mcast, &priv->multicast_list, list) {
> +		if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
> +			mcast = NULL;
> +			delay_until = 0;
> +			goto out;
> +		}
>  		if (IS_ERR_OR_NULL(mcast->mc) &&
>  		    !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
>  		    (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ||
> @@ -621,7 +630,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
>  				init_completion(&mcast->done);
>  				set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
>  				spin_unlock_irq(&priv->lock);
> -				ipoib_mcast_join(dev, mcast);
> +				if (ipoib_mcast_join(dev, mcast) != 0)

3. There is no need to compare with 0. Just use "if (ipoib...(...))"
construction.

> +					return;
>  				spin_lock_irq(&priv->lock);
>  			} else if (!delay_until ||
>  				 time_before(mcast->delay_until, delay_until))
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2016-02-06 17:18 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-06 13:50 [PATCH v2] IB/ipoib: fix for rare multicast join race condition Alex Estrin
     [not found] ` <20160206135041.11630.77019.stgit-u2TXY/5TJkdZ7WVY1cDZ9q2pdiUAq4bhAL8bYrjMMd8@public.gmane.org>
2016-02-06 17:18   ` Leon Romanovsky [this message]
     [not found]     ` <20160206171828.GD8584-2ukJVAZIZ/Y@public.gmane.org>
2016-02-08 12:23       ` Estrin, Alex
     [not found]         ` <F3529576D8E232409F431C309E29399328F6C7FE-8k97q/ur5Z1cIJlls4ac1rfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2016-02-08 13:22           ` leon-2ukJVAZIZ/Y
2016-02-08 13:34   ` Erez Shitrit
     [not found]     ` <CAAk-MO8b_bpb=cd0Ki7wu3sUSG_rC+fS0sxhmdn5cA-91WfR4g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-02-08 16:43       ` Estrin, Alex
     [not found]         ` <F3529576D8E232409F431C309E29399328F6C892-8k97q/ur5Z1cIJlls4ac1rfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2016-02-09  7:18           ` Leon Romanovsky
     [not found]             ` <20160209071847.GA14741-2ukJVAZIZ/Y@public.gmane.org>
2016-02-09 12:06               ` Estrin, Alex

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160206171828.GD8584@leon.nu \
    --to=leon-2ukjvaziz/y@public.gmane.org \
    --cc=alex.estrin-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    --cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.