* [PATCH v2 2/2] iw_cm: free cm_id resources on the last deref
2016-07-29 20:36 [PATCH v2 0/2] iwarp device removal deadlock fixes Steve Wise
@ 2016-07-29 18:00 ` Steve Wise
2016-07-29 18:00 ` [PATCH v2 1/2] iw_cxgb4: don't block in destroy_qp awaiting " Steve Wise
2016-08-02 17:24 ` [PATCH v2 0/2] iwarp device removal deadlock fixes Doug Ledford
2 siblings, 0 replies; 5+ messages in thread
From: Steve Wise @ 2016-07-29 18:00 UTC (permalink / raw)
Remove the complicated logic to free the iw_cm_id inside iw_cm
event handlers vs when an application thread destroys the cm_id.
Also remove the block in iw_destroy_cm_id() to block the application
until all references are removed. This block can cause a deadlock when
disconnecting or destroying cm_ids inside an rdma_cm event handler.
Simply allowing the last deref of the iw_cm_id to free the memory
is cleaner and avoids this potential deadlock. Also a flag is added,
IW_CM_DROP_EVENTS, that is set when the cm_id is marked for destruction.
If any events are pending on this iw_cm_id, then as they are processed
they will be dropped vs posted upstream if IW_CM_DROP_EVENTS is set.
Signed-off-by: Steve Wise <swise at opengridcomputing.com>
---
drivers/infiniband/core/iwcm.c | 54 +++++++++++++-----------------------------
drivers/infiniband/core/iwcm.h | 2 +-
2 files changed, 18 insertions(+), 38 deletions(-)
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index f057204..357624f 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -183,15 +183,14 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
/*
* Release a reference on cm_id. If the last reference is being
- * released, enable the waiting thread (in iw_destroy_cm_id) to
- * get woken up, and return 1 if a thread is already waiting.
+ * released, free the cm_id and return 1.
*/
static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
{
BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
if (atomic_dec_and_test(&cm_id_priv->refcount)) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
- complete(&cm_id_priv->destroy_comp);
+ free_cm_id(cm_id_priv);
return 1;
}
@@ -208,19 +207,10 @@ static void add_ref(struct iw_cm_id *cm_id)
static void rem_ref(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
- int cb_destroy;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- /*
- * Test bit before deref in case the cm_id gets freed on another
- * thread.
- */
- cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
- if (iwcm_deref_id(cm_id_priv) && cb_destroy) {
- BUG_ON(!list_empty(&cm_id_priv->work_list));
- free_cm_id(cm_id_priv);
- }
+ (void)iwcm_deref_id(cm_id_priv);
}
static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
@@ -370,6 +360,12 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
wait_event(cm_id_priv->connect_wait,
!test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+ /*
+ * Since we're deleting the cm_id, drop any events that
+ * might arrive before the last dereference.
+ */
+ set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
+
spin_lock_irqsave(&cm_id_priv->lock, flags);
switch (cm_id_priv->state) {
case IW_CM_STATE_LISTEN:
@@ -433,13 +429,7 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id)
struct iwcm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
-
destroy_cm_id(cm_id);
-
- wait_for_completion(&cm_id_priv->destroy_comp);
-
- free_cm_id(cm_id_priv);
}
EXPORT_SYMBOL(iw_destroy_cm_id);
@@ -809,10 +799,7 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
ret = cm_id->cm_handler(cm_id, iw_event);
if (ret) {
iw_cm_reject(cm_id, NULL, 0);
- set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
- destroy_cm_id(cm_id);
- if (atomic_read(&cm_id_priv->refcount)==0)
- free_cm_id(cm_id_priv);
+ iw_destroy_cm_id(cm_id);
}
out:
@@ -1000,7 +987,6 @@ static void cm_work_handler(struct work_struct *_work)
unsigned long flags;
int empty;
int ret = 0;
- int destroy_id;
spin_lock_irqsave(&cm_id_priv->lock, flags);
empty = list_empty(&cm_id_priv->work_list);
@@ -1013,20 +999,14 @@ static void cm_work_handler(struct work_struct *_work)
put_work(work);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = process_event(cm_id_priv, &levent);
- if (ret) {
- set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
- destroy_cm_id(&cm_id_priv->id);
- }
- BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
- destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
- if (iwcm_deref_id(cm_id_priv)) {
- if (destroy_id) {
- BUG_ON(!list_empty(&cm_id_priv->work_list));
- free_cm_id(cm_id_priv);
- }
+ if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
+ ret = process_event(cm_id_priv, &levent);
+ if (ret)
+ destroy_cm_id(&cm_id_priv->id);
+ } else
+ pr_debug("dropping event %d\n", levent.event);
+ if (iwcm_deref_id(cm_id_priv))
return;
- }
if (empty)
return;
spin_lock_irqsave(&cm_id_priv->lock, flags);
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
index 3f6cc82..82c2cd1 100644
--- a/drivers/infiniband/core/iwcm.h
+++ b/drivers/infiniband/core/iwcm.h
@@ -56,7 +56,7 @@ struct iwcm_id_private {
struct list_head work_free_list;
};
-#define IWCM_F_CALLBACK_DESTROY 1
+#define IWCM_F_DROP_EVENTS 1
#define IWCM_F_CONNECT_WAIT 2
#endif /* IWCM_H */
--
2.7.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH v2 1/2] iw_cxgb4: don't block in destroy_qp awaiting the last deref
2016-07-29 20:36 [PATCH v2 0/2] iwarp device removal deadlock fixes Steve Wise
2016-07-29 18:00 ` [PATCH v2 2/2] iw_cm: free cm_id resources on the last deref Steve Wise
@ 2016-07-29 18:00 ` Steve Wise
2016-07-31 14:29 ` Sagi Grimberg
2016-08-02 17:24 ` [PATCH v2 0/2] iwarp device removal deadlock fixes Doug Ledford
2 siblings, 1 reply; 5+ messages in thread
From: Steve Wise @ 2016-07-29 18:00 UTC (permalink / raw)
Blocking in c4iw_destroy_qp() causes a deadlock when apps destroy a qp
or disconnect a cm_id from their cm event handler function. There is
no need to block here anyway, so just replace the refcnt atomic with a
kref object and free the memory on the last put.
Signed-off-by: Steve Wise <swise at opengridcomputing.com>
---
drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +-
drivers/infiniband/hw/cxgb4/qp.c | 21 ++++++++++++++-------
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index f6f34a7..0420c21 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -472,7 +472,7 @@ struct c4iw_qp {
struct t4_wq wq;
spinlock_t lock;
struct mutex mutex;
- atomic_t refcnt;
+ struct kref kref;
wait_queue_head_t wait;
struct timer_list timer;
int sq_sig_all;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 0d461f3..770531a3 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -683,17 +683,25 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr,
return 0;
}
+void _free_qp(struct kref *kref)
+{
+ struct c4iw_qp *qhp;
+
+ qhp = container_of(kref, struct c4iw_qp, kref);
+ PDBG("%s qhp %p\n", __func__, qhp);
+ kfree(qhp);
+}
+
void c4iw_qp_add_ref(struct ib_qp *qp)
{
PDBG("%s ib_qp %p\n", __func__, qp);
- atomic_inc(&(to_c4iw_qp(qp)->refcnt));
+ kref_get(&to_c4iw_qp(qp)->kref);
}
void c4iw_qp_rem_ref(struct ib_qp *qp)
{
PDBG("%s ib_qp %p\n", __func__, qp);
- if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt)))
- wake_up(&(to_c4iw_qp(qp)->wait));
+ kref_put(&to_c4iw_qp(qp)->kref, _free_qp);
}
static void add_to_fc_list(struct list_head *head, struct list_head *entry)
@@ -1592,8 +1600,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
wait_event(qhp->wait, !qhp->ep);
remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
- atomic_dec(&qhp->refcnt);
- wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
spin_lock_irq(&rhp->lock);
if (!list_empty(&qhp->db_fc_entry))
@@ -1606,8 +1612,9 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ c4iw_qp_rem_ref(ib_qp);
+
PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
- kfree(qhp);
return 0;
}
@@ -1704,7 +1711,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
init_completion(&qhp->rq_drained);
mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait);
- atomic_set(&qhp->refcnt, 1);
+ kref_init(&qhp->kref);
ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
if (ret)
--
2.7.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH v2 0/2] iwarp device removal deadlock fixes
2016-07-29 20:36 [PATCH v2 0/2] iwarp device removal deadlock fixes Steve Wise
2016-07-29 18:00 ` [PATCH v2 2/2] iw_cm: free cm_id resources on the last deref Steve Wise
2016-07-29 18:00 ` [PATCH v2 1/2] iw_cxgb4: don't block in destroy_qp awaiting " Steve Wise
@ 2016-08-02 17:24 ` Doug Ledford
2 siblings, 0 replies; 5+ messages in thread
From: Doug Ledford @ 2016-08-02 17:24 UTC (permalink / raw)
On Fri, 2016-07-29@13:36 -0700, Steve Wise wrote:
> This series fixes the deadlock issue discovered while testing
> nvmf/rdma
> handling rdma device removal events from the rdma_cm.??For a
> discussion
> of the deadlock that can happen, see
>
> http://lists.infradead.org/pipermail/linux-nvme/2016-July/005440.html
> .
>
> For my description of the deadlock itself, see this post in the above
> thread:
>
> http://lists.infradead.org/pipermail/linux-nvme/2016-July/005465.html
>
> In a nutshell, iw_cxgb4 and the iw_cm block during qp/cm_id
> destruction
> until all references are removed.??This combined with the iwarp CM
> passing
> disconnect events up to the rdma_cm during disconnect and/or qp/cm_id
> destruction
> leads to a deadlock.
>
> My proposed solution is to remove the need for iw_cxgb4 and iw_cm to
> block during object destruction for the refnts to reach 0, but rather
> to
> let the freeing of the object memory be deferred when the last deref
> is
> done, which is SOP in the much of the linux kernel. This allows all
> the
> qps/cm_ids to be destroyed without blocking, and all the object
> memory
> freeing ends up happinging when the application's device_remove event
> handler function returns to the rdma_cm.
>
> This series is needed along with Sagi's fixes from:
> https://www.spinics.net/lists/linux-rdma/msg38715.html
>
> Hey Faisal, it would be great to get some review/test tags from Intel
> on the iw_cm change.??Thanks!
>
> Changes since v1:
>
> - reworded commit text for the iw_cm patch
>
> - added a iw_cm_id flag to drop pending events when the cm_id has
> been marked for destruction.
>
> ---
>
> Steve Wise (2):
> ? iw_cxgb4: don't block in destroy_qp awaiting the last deref
> ? iw_cm: free cm_id resources on the last deref
>
> ?drivers/infiniband/core/iwcm.c?????????| 54 +++++++++++-------------
> ----------
> ?drivers/infiniband/core/iwcm.h?????????|??2 +-
> ?drivers/infiniband/hw/cxgb4/iw_cxgb4.h |??2 +-
> ?drivers/infiniband/hw/cxgb4/qp.c???????| 21 ++++++++-----
> ?4 files changed, 33 insertions(+), 46 deletions(-)
>
Series applied, thanks.
--
Doug Ledford <dledford at redhat.com>
GPG KeyID: 0E572FDD
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.infradead.org/pipermail/linux-nvme/attachments/20160802/ec1255d9/attachment.sig>
^ permalink raw reply [flat|nested] 5+ messages in thread