All of lore.kernel.org
 help / color / mirror / Atom feed
From: Krishnamraju Eraparaju <krishna2@chelsio.com>
To: Sagi Grimberg <sagi@grimberg.me>,
	Steve Wise <larrystevenwise@gmail.com>,
	Bernard Metzler <BMT@zurich.ibm.com>
Cc: "linux-rdma@vger.kernel.org" <linux-rdma@vger.kernel.org>,
	Jason Gunthorpe <jgg@ziepe.ca>
Subject: Re: [PATCH v3] iwcm: don't hold the irq disabled lock on iw_rem_ref
Date: Wed, 11 Sep 2019 00:51:59 +0530	[thread overview]
Message-ID: <20190910192157.GA5954@chelsio.com> (raw)
In-Reply-To: <5cc42f23-bf60-ca8d-f40c-cbd8875f5756@grimberg.me>

Please review the below patch, I will resubmit this in patch-series
after review.
- As kput_ref handler(siw_free_qp) uses vfree, iwcm can't call
  iw_rem_ref() with spinlocks held. Doing so can cause vfree() to sleep
  with irq disabled.
  Two possible solutions:
  1)With spinlock acquired, take a copy of "cm_id_priv->qp" and update
    it to NULL. And after releasing lock use the copied qp pointer for
    rem_ref().
  2)Replacing issue causing vmalloc()/vfree to kmalloc()/kfree in SIW
    driver, may not be a ideal solution.
  
  Solution 2 may not be ideal as allocating huge contigous memory for
   SQ & RQ doesn't look appropriate.
  
- The structure "siw_base_qp" is getting freed in siw_destroy_qp(), but
  if cm_close_handler() holds the last reference, then siw_free_qp(),
  via cm_close_handler(), tries to get already freed "siw_base_qp" from
  "ib_qp". 
   Hence, "siw_base_qp" should be freed at the end of siw_free_qp().


diff --git a/drivers/infiniband/core/iwcm.c
b/drivers/infiniband/core/iwcm.c
index 72141c5b7c95..d5ab69fa598a 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -373,6 +373,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
 {
        struct iwcm_id_private *cm_id_priv;
        unsigned long flags;
+       struct ib_qp *qp;

        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
        /*
@@ -389,6 +390,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
        set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);

        spin_lock_irqsave(&cm_id_priv->lock, flags);
+       qp = cm_id_priv->qp;
+       cm_id_priv->qp = NULL;
+
        switch (cm_id_priv->state) {
        case IW_CM_STATE_LISTEN:
                cm_id_priv->state = IW_CM_STATE_DESTROYING;
@@ -401,7 +405,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
                cm_id_priv->state = IW_CM_STATE_DESTROYING;
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
                /* Abrupt close of the connection */
-               (void)iwcm_modify_qp_err(cm_id_priv->qp);
+               (void)iwcm_modify_qp_err(qp);
                spin_lock_irqsave(&cm_id_priv->lock, flags);
                break;
        case IW_CM_STATE_IDLE:
@@ -426,11 +430,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
                BUG();
                break;
        }
-       if (cm_id_priv->qp) {
-               cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
-               cm_id_priv->qp = NULL;
-       }
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       if (qp)
+               cm_id_priv->id.device->ops.iw_rem_ref(qp);

        if (cm_id->mapped) {
                iwpm_remove_mapinfo(&cm_id->local_addr,
&cm_id->m_local_addr);
@@ -671,11 +673,11 @@ int iw_cm_accept(struct iw_cm_id *cm_id,
                BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
                cm_id_priv->state = IW_CM_STATE_IDLE;
                spin_lock_irqsave(&cm_id_priv->lock, flags);
-               if (cm_id_priv->qp) {
-                       cm_id->device->ops.iw_rem_ref(qp);
-                       cm_id_priv->qp = NULL;
-               }
+               qp = cm_id_priv->qp;
+               cm_id_priv->qp = NULL;
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               if (qp)
+                       cm_id->device->ops.iw_rem_ref(qp);
                clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
                wake_up_all(&cm_id_priv->connect_wait);
        }
@@ -730,13 +732,13 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct
iw_cm_conn_param *iw_param)
                return 0;       /* success */

        spin_lock_irqsave(&cm_id_priv->lock, flags);
-       if (cm_id_priv->qp) {
-               cm_id->device->ops.iw_rem_ref(qp);
-               cm_id_priv->qp = NULL;
-       }
+       qp = cm_id_priv->qp;
+       cm_id_priv->qp = NULL;
        cm_id_priv->state = IW_CM_STATE_IDLE;
 err:
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       if (qp)
+               cm_id->device->ops.iw_rem_ref(qp);
        clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
        wake_up_all(&cm_id_priv->connect_wait);
        return ret;
@@ -880,6 +882,7 @@ static int cm_conn_rep_handler(struct
iwcm_id_private *cm_id_priv,
 {
        unsigned long flags;
        int ret;
+       struct ib_qp *qp = NULL;

        spin_lock_irqsave(&cm_id_priv->lock, flags);
        /*
@@ -896,11 +899,13 @@ static int cm_conn_rep_handler(struct
iwcm_id_private *cm_id_priv,
                cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
        } else {
                /* REJECTED or RESET */
-               cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
+               qp = cm_id_priv->qp;
                cm_id_priv->qp = NULL;
                cm_id_priv->state = IW_CM_STATE_IDLE;
        }
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       if (qp)
+               cm_id_priv->id.device->ops.iw_rem_ref(qp);
        ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);

        if (iw_event->private_data_len)
@@ -944,12 +949,12 @@ static int cm_close_handler(struct iwcm_id_private
*cm_id_priv,
 {
        unsigned long flags;
        int ret = 0;
+       struct ib_qp *qp;
+
        spin_lock_irqsave(&cm_id_priv->lock, flags);
+       qp = cm_id_priv->qp;
+       cm_id_priv->qp = NULL;

-       if (cm_id_priv->qp) {
-               cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
-               cm_id_priv->qp = NULL;
-       }
        switch (cm_id_priv->state) {
        case IW_CM_STATE_ESTABLISHED:
        case IW_CM_STATE_CLOSING:
@@ -965,6 +970,8 @@ static int cm_close_handler(struct iwcm_id_private
*cm_id_priv,
        }
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);

+       if (qp)
+               cm_id_priv->id.device->ops.iw_rem_ref(qp);
        return ret;
 }

diff --git a/drivers/infiniband/sw/siw/siw_qp.c
b/drivers/infiniband/sw/siw/siw_qp.c
index 430314c8abd9..cb177688a49f 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -1307,6 +1307,7 @@ void siw_free_qp(struct kref *ref)
        struct siw_qp *found, *qp = container_of(ref, struct siw_qp,
ref);
        struct siw_device *sdev = qp->sdev;
        unsigned long flags;
+       struct siw_base_qp *siw_base_qp = to_siw_base_qp(qp->ib_qp);

        if (qp->cep)
                siw_cep_put(qp->cep);
@@ -1327,4 +1328,5 @@ void siw_free_qp(struct kref *ref)
        atomic_dec(&sdev->num_qp);
        siw_dbg_qp(qp, "free QP\n");
        kfree_rcu(qp, rcu);
+       kfree(siw_base_qp);
 }
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c
b/drivers/infiniband/sw/siw/siw_verbs.c
index da52c90e06d4..ac08d84d84cb 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -603,7 +603,6 @@ int siw_verbs_modify_qp(struct ib_qp *base_qp,
struct ib_qp_attr *attr,
 int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
 {
        struct siw_qp *qp = to_siw_qp(base_qp);
-       struct siw_base_qp *siw_base_qp = to_siw_base_qp(base_qp);
        struct siw_ucontext *uctx =
                rdma_udata_to_drv_context(udata, struct siw_ucontext,
                                          base_ucontext);
@@ -640,7 +639,6 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct
ib_udata *udata)
        qp->scq = qp->rcq = NULL;

        siw_qp_put(qp);
-       kfree(siw_base_qp);

        return 0;
 }


On Tuesday, September 09/10/19, 2019 at 22:23:13 +0530, Sagi Grimberg wrote:
> 
> >> This may be the final put on a qp and result in freeing
> >> resourcesand should not be done with interrupts disabled.
> > 
> > Hi Sagi,
> > 
> > Few things to consider in fixing this completely:
> >    - there are some other places where iw_rem_ref() should be called
> >      after spinlock critical section. eg: in cm_close_handler(),
> > iw_cm_connect(),...
> >    - Any modifications to "cm_id_priv" should be done with in spinlock
> >      critical section, modifying cm_id_priv->qp outside spinlocks, even
> > with atomic xchg(), might be error prone.
> >    - the structure "siw_base_qp" is getting freed in siw_destroy_qp(),
> >      but it should be done at the end of siw_free_qp().
> 
> Not sure why you say that, at the end of this function ->qp will be null
> anyways...
 Hope the above description and patch answers this.
> 
> >    
> > I am about to finish writing a patch that cover all the above issues.
> > Will test it and submit here by EOD.
> 
> Sure, you take it. Just stumbled on it so thought I'd go ahead and send
> a patch...

  reply	other threads:[~2019-09-10 19:22 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-04 21:25 [PATCH v3] iwcm: don't hold the irq disabled lock on iw_rem_ref Sagi Grimberg
2019-09-10 11:18 ` Krishnamraju Eraparaju
2019-09-10 16:53   ` Sagi Grimberg
2019-09-10 19:21     ` Krishnamraju Eraparaju [this message]
2019-09-11  9:38       ` Bernard Metzler
2019-09-11 14:42         ` Steve Wise
2019-09-11 15:58           ` Krishnamraju Eraparaju
2019-09-16 16:28             ` Jason Gunthorpe
2019-09-17  9:04               ` Bernard Metzler
2019-09-17 12:47                 ` Krishnamraju Eraparaju
2019-09-17 13:40                   ` Bernard Metzler
2019-09-17 17:20                 ` Sagi Grimberg
2019-09-18 13:43                   ` Krishnamraju Eraparaju
2019-09-21 18:35                     ` Krishnamraju Eraparaju
2019-10-01 17:17                       ` Krishnamraju Eraparaju

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190910192157.GA5954@chelsio.com \
    --to=krishna2@chelsio.com \
    --cc=BMT@zurich.ibm.com \
    --cc=jgg@ziepe.ca \
    --cc=larrystevenwise@gmail.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.