linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Erez Shitrit <erezsh-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
To: Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	Or Gerlitz <gerlitz.or-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>,
	Erez Shitrit <erezsh-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Subject: Re: [PATCH 8/9] IB/ipoib: deserialize multicast joins
Date: Tue, 03 Mar 2015 11:54:39 +0200	[thread overview]
Message-ID: <54F584DF.8080603@dev.mellanox.co.il> (raw)
In-Reply-To: <1425310145.2354.26.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

On 3/2/2015 5:29 PM, Doug Ledford wrote:
> On Sun, 2015-03-01 at 15:58 +0200, Erez Shitrit wrote:
>> On 2/22/2015 2:27 AM, Doug Ledford wrote:
>>> Allow the ipoib layer to attempt to join all outstanding multicast
>>> groups at once.  The ib_sa layer will serialize multiple attempts to
>>> join the same group, but will process attempts to join different groups
>>> in parallel.  Take advantage of that.
>>>
>>> In order to make this happen, change the mcast_join_thread to loop
>>> through all needed joins, sending a join request for each one that we
>>> still need to join.  There are a few special cases we handle though:
>>>
>>> 1) Don't attempt to join anything but the broadcast group until the join
>>> of the broadcast group has succeeded.
>>> 2) No longer restart the join task at the end of completion handling.
>>> If we completed successfully, we are done.  The join task now needs kicked
>>> either by mcast_send or mcast_restart_task or mcast_start_thread, but
>>> should not need started anytime else except when scheduling a backoff
>>> attempt to rejoin.
>>> 3) No longer use separate join/completion routines for regular and
>>> sendonly joins, pass them all through the same routine and just do the
>>> right thing based on the SENDONLY join flag.
>>> 4) Only try to join a SENDONLY join twice, then drop the packets and
>>> quit trying.  We leave the mcast group in the list so that if we get a
>>> new packet, all that we have to do is queue up the packet and restart
>>> the join task and it will automatically try to join twice and then
>>> either send or flush the queue again.
>>>
>>> Signed-off-by: Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
>>> ---
>>>    drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 250 ++++++++-----------------
>>>    1 file changed, 82 insertions(+), 168 deletions(-)
>>>
>>> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
>>> index 277e7ac7c4d..c670d9c2cda 100644
>>> --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
>>> +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
>>> @@ -307,111 +307,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
>>>    	return 0;
>>>    }
>>>    
>>> -static int
>>> -ipoib_mcast_sendonly_join_complete(int status,
>>> -				   struct ib_sa_multicast *multicast)
>>> -{
>>> -	struct ipoib_mcast *mcast = multicast->context;
>>> -	struct net_device *dev = mcast->dev;
>>> -	struct ipoib_dev_priv *priv = netdev_priv(dev);
>>> -
>>> -	/*
>>> -	 * We have to take the mutex to force mcast_sendonly_join to
>>> -	 * return from ib_sa_multicast_join and set mcast->mc to a
>>> -	 * valid value.  Otherwise we were racing with ourselves in
>>> -	 * that we might fail here, but get a valid return from
>>> -	 * ib_sa_multicast_join after we had cleared mcast->mc here,
>>> -	 * resulting in mis-matched joins and leaves and a deadlock
>>> -	 */
>>> -	mutex_lock(&mcast_mutex);
>>> -
>>> -	/* We trap for port events ourselves. */
>>> -	if (status == -ENETRESET) {
>>> -		status = 0;
>>> -		goto out;
>>> -	}
>>> -
>>> -	if (!status)
>>> -		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
>>> -
>>> -	if (status) {
>>> -		if (mcast->logcount++ < 20)
>>> -			ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast "
>>> -					"join failed for %pI6, status %d\n",
>>> -					mcast->mcmember.mgid.raw, status);
>>> -
>>> -		/* Flush out any queued packets */
>>> -		netif_tx_lock_bh(dev);
>>> -		while (!skb_queue_empty(&mcast->pkt_queue)) {
>>> -			++dev->stats.tx_dropped;
>>> -			dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
>>> -		}
>>> -		netif_tx_unlock_bh(dev);
>>> -		__ipoib_mcast_schedule_join_thread(priv, mcast, 1);
>>> -	} else {
>>> -		mcast->backoff = 1;
>>> -		mcast->delay_until = jiffies;
>>> -		__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
>>> -	}
>>> -out:
>>> -	clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
>>> -	if (status)
>>> -		mcast->mc = NULL;
>>> -	complete(&mcast->done);
>>> -	mutex_unlock(&mcast_mutex);
>>> -	return status;
>>> -}
>>> -
>>> -static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
>>> -{
>>> -	struct net_device *dev = mcast->dev;
>>> -	struct ipoib_dev_priv *priv = netdev_priv(dev);
>>> -	struct ib_sa_mcmember_rec rec = {
>>> -#if 0				/* Some SMs don't support send-only yet */
>>> -		.join_state = 4
>>> -#else
>>> -		.join_state = 1
>>> -#endif
>>> -	};
>>> -	int ret = 0;
>>> -
>>> -	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
>>> -		ipoib_dbg_mcast(priv, "device shutting down, no sendonly "
>>> -				"multicast joins\n");
>>> -		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
>>> -		complete(&mcast->done);
>>> -		return -ENODEV;
>>> -	}
>>> -
>>> -	rec.mgid     = mcast->mcmember.mgid;
>>> -	rec.port_gid = priv->local_gid;
>>> -	rec.pkey     = cpu_to_be16(priv->pkey);
>>> -
>>> -	mutex_lock(&mcast_mutex);
>>> -	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
>>> -					 priv->port, &rec,
>>> -					 IB_SA_MCMEMBER_REC_MGID	|
>>> -					 IB_SA_MCMEMBER_REC_PORT_GID	|
>>> -					 IB_SA_MCMEMBER_REC_PKEY	|
>>> -					 IB_SA_MCMEMBER_REC_JOIN_STATE,
>>> -					 GFP_ATOMIC,
>>> -					 ipoib_mcast_sendonly_join_complete,
>>> -					 mcast);
>>> -	if (IS_ERR(mcast->mc)) {
>>> -		ret = PTR_ERR(mcast->mc);
>>> -		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
>>> -		ipoib_warn(priv, "ib_sa_join_multicast for sendonly join "
>>> -			   "failed (ret = %d)\n", ret);
>>> -		complete(&mcast->done);
>>> -	} else {
>>> -		ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting "
>>> -				"sendonly join\n", mcast->mcmember.mgid.raw);
>>> -	}
>>> -	mutex_unlock(&mcast_mutex);
>>> -
>>> -	return ret;
>>> -}
>>> -
>>>    void ipoib_mcast_carrier_on_task(struct work_struct *work)
>>>    {
>>>    	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
>>> @@ -452,7 +347,9 @@ static int ipoib_mcast_join_complete(int status,
>>>    	struct net_device *dev = mcast->dev;
>>>    	struct ipoib_dev_priv *priv = netdev_priv(dev);
>>>    
>>> -	ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
>>> +	ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
>>> +			test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
>>> +			"sendonly " : "",
>>>    			mcast->mcmember.mgid.raw, status);
>>>    
>>>    	/*
>>> @@ -477,27 +374,52 @@ static int ipoib_mcast_join_complete(int status,
>>>    	if (!status) {
>>>    		mcast->backoff = 1;
>>>    		mcast->delay_until = jiffies;
>>> -		__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
>>>    
>>>    		/*
>>>    		 * Defer carrier on work to priv->wq to avoid a
>>> -		 * deadlock on rtnl_lock here.
>>> +		 * deadlock on rtnl_lock here.  Requeue our multicast
>>> +		 * work too, which will end up happening right after
>>> +		 * our carrier on task work and will allow us to
>>> +		 * send out all of the non-broadcast joins
>>>    		 */
>>> -		if (mcast == priv->broadcast)
>>> +		if (mcast == priv->broadcast) {
>>>    			queue_work(priv->wq, &priv->carrier_on_task);
>>> +			__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
>>> +		}
>>>    	} else {
>>>    		if (mcast->logcount++ < 20) {
>>>    			if (status == -ETIMEDOUT || status == -EAGAIN) {
>>> -				ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
>>> +				ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
>>> +						test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
>>>    						mcast->mcmember.mgid.raw, status);
>>>    			} else {
>>> -				ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
>>> +				ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n",
>>> +						test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
>>>    					   mcast->mcmember.mgid.raw, status);
>>>    			}
>>>    		}
>>>    
>>> -		/* Requeue this join task with a backoff delay */
>>> -		__ipoib_mcast_schedule_join_thread(priv, mcast, 1);
>>> +		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
>>> +		    mcast->backoff >= 2) {
>>> +			/*
>>> +			 * We only retry sendonly joins once before we drop
>>> +			 * the packet and quit trying to deal with the
>>> +			 * group.  However, we leave the group in the
>>> +			 * mcast list as an unjoined group.  If we want to
>>> +			 * try joining again, we simply queue up a packet
>>> +			 * and restart the join thread.  The empty queue
>>> +			 * is why the join thread ignores this group.
>>> +			 */
>> Question: the sendonly is at the list for ever? looks like that, and it
>> is prior to your patches, so probably it should be sent in some other
>> patch to solve that.
> Correct.  That logic is unchanged.  It probably deserves some sort of
> timeout such that after X seconds with no traffic on a sendonly group,
> we leave that group and only rejoin if we get a new send.  I had thought
> about making it something really long, like 20 minutes, but thinking
> about it is all I've done.  I didn't code anything up or include that in
> these patches.
>
OK, Thanks. We we will need to send a patch for that.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2015-03-03  9:54 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-22  0:26 [PATCH 0/9] IB/ipoib: fixup multicast locking issues Doug Ledford
     [not found] ` <cover.1424562072.git.dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-02-22  0:26   ` [PATCH 1/9] IB/ipoib: factor out ah flushing Doug Ledford
     [not found]     ` <b06eb720c2f654f5ecdb72c66f4e89149d1c24ec.1424562072.git.dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-02-26 13:28       ` Erez Shitrit
     [not found]         ` <54EF1F67.4000001-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-02-26 16:27           ` Doug Ledford
     [not found]             ` <1424968046.2543.18.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-03-01  6:47               ` Erez Shitrit
     [not found]                 ` <54F2B61C.9080308-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-03-02 15:09                   ` Doug Ledford
     [not found]                     ` <1425308967.2354.19.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-03-03  9:59                       ` Erez Shitrit
     [not found]                         ` <54F585E9.7070704-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-03-13  8:39                           ` Or Gerlitz
     [not found]                             ` <CAJ3xEMgxxHu5BQdADaRe-Grtf4rm1LMfsCRiDyF6ToPdV_62OA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-03-15 18:42                               ` Doug Ledford
     [not found]                                 ` <3A0A417D-BFE4-475C-BAB3-C3FB1D313022-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-03-16 15:24                                   ` Erez Shitrit
     [not found]                                     ` <5506F5B2.1080900-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-03-16 16:06                                       ` Doug Ledford
     [not found]                                         ` <ADC46FD9-3179-4182-949D-1884C9D31757-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-03-16 16:51                                           ` Erez Shitrit
2015-03-16 18:00                                       ` Doug Ledford
2015-02-22  0:27   ` [PATCH 2/9] IB/ipoib: change init sequence ordering Doug Ledford
2015-02-22  0:27   ` [PATCH 3/9] IB/ipoib: Consolidate rtnl_lock tasks in workqueue Doug Ledford
2015-02-22  0:27   ` [PATCH 4/9] IB/ipoib: Make the carrier_on_task race aware Doug Ledford
2015-02-22  0:27   ` [PATCH 5/9] IB/ipoib: Use dedicated workqueues per interface Doug Ledford
     [not found]     ` <1cfdf15058cea312f07c2907490a1d7300603c40.1424562072.git.dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-02-23 16:48       ` Or Gerlitz
2015-02-22  0:27   ` [PATCH 6/9] IB/ipoib: No longer use flush as a parameter Doug Ledford
2015-02-22  0:27   ` [PATCH 7/9] IB/ipoib: fix MCAST_FLAG_BUSY usage Doug Ledford
     [not found]     ` <9d657f64ee961ee3b3233520d8b499b234a42bcd.1424562072.git.dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-03-01  9:31       ` Erez Shitrit
     [not found]         ` <54F2DC81.304-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-03-02 15:27           ` Doug Ledford
     [not found]             ` <1425310036.2354.24.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-03-03  9:53               ` Erez Shitrit
2015-02-22  0:27   ` [PATCH 8/9] IB/ipoib: deserialize multicast joins Doug Ledford
     [not found]     ` <a24ade295dfdd1369aac47a978003569ec190952.1424562072.git.dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-03-01 13:58       ` Erez Shitrit
     [not found]         ` <54F31AEC.3010001-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-03-02 15:29           ` Doug Ledford
     [not found]             ` <1425310145.2354.26.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-03-03  9:54               ` Erez Shitrit [this message]
2015-02-22  0:27   ` [PATCH 9/9] IB/ipoib: drop mcast_mutex usage Doug Ledford
     [not found]     ` <767f4c41779db63ce8c6dbba04b21959aba70ef9.1424562072.git.dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-02-23 16:56       ` Or Gerlitz
     [not found]         ` <CAJ3xEMgLPF9pCwQDy9QyL9fAERJXJRXN2gBj3nhuXUCcbfCMPg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-02-23 17:41           ` Doug Ledford
2015-02-22 21:34   ` [PATCH 0/9] IB/ipoib: fixup multicast locking issues Or Gerlitz
     [not found]     ` <CAJ3xEMgj=ATKLt0MA67c3WefCrG1hZ59eSrhpD-u_dxLJe2kfg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-02-22 21:56       ` Doug Ledford
     [not found]         ` <1424642176.4847.2.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-02-22 21:57           ` Doug Ledford
2015-03-13  8:41   ` Or Gerlitz
     [not found]     ` <CAJ3xEMjHrTH_F=zPDsH9A9qRWo=AYN4sgbsdDKV62nzBkB5kXA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-03-15 18:52       ` Doug Ledford
     [not found]         ` <F42024C5-60A5-4B92-B4AC-4D225E2C0FC3-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-03-31 17:04           ` ira.weiny
     [not found]             ` <20150331170452.GA6261-W4f6Xiosr+yv7QzWx2u06xL4W9x8LtSr@public.gmane.org>
2015-03-31 20:42               ` Or Gerlitz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=54F584DF.8080603@dev.mellanox.co.il \
    --to=erezsh-ldsdmyg8hgv8yrgs2mwiifqbs+8scbdb@public.gmane.org \
    --cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=erezsh-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=gerlitz.or-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=roland-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).