* [PATCH v2 1/2] iw_cxgb4: don't block in destroy_qp awaiting the last deref
2016-07-29 20:36 [PATCH v2 0/2] iwarp device removal deadlock fixes Steve Wise
@ 2016-07-29 18:00 ` Steve Wise
2016-07-31 14:29 ` Sagi Grimberg
2016-07-29 18:00 ` [PATCH v2 2/2] iw_cm: free cm_id resources on " Steve Wise
2016-08-02 17:24 ` [PATCH v2 0/2] iwarp device removal deadlock fixes Doug Ledford
2 siblings, 1 reply; 5+ messages in thread
From: Steve Wise @ 2016-07-29 18:00 UTC (permalink / raw)
Blocking in c4iw_destroy_qp() causes a deadlock when apps destroy a qp
or disconnect a cm_id from their cm event handler function. There is
no need to block here anyway, so just replace the refcnt atomic with a
kref object and free the memory on the last put.
Signed-off-by: Steve Wise <swise at opengridcomputing.com>
---
drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +-
drivers/infiniband/hw/cxgb4/qp.c | 21 ++++++++++++++-------
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index f6f34a7..0420c21 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -472,7 +472,7 @@ struct c4iw_qp {
struct t4_wq wq;
spinlock_t lock;
struct mutex mutex;
- atomic_t refcnt;
+ struct kref kref;
wait_queue_head_t wait;
struct timer_list timer;
int sq_sig_all;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 0d461f3..770531a3 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -683,17 +683,25 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr,
return 0;
}
+void _free_qp(struct kref *kref)
+{
+ struct c4iw_qp *qhp;
+
+ qhp = container_of(kref, struct c4iw_qp, kref);
+ PDBG("%s qhp %p\n", __func__, qhp);
+ kfree(qhp);
+}
+
void c4iw_qp_add_ref(struct ib_qp *qp)
{
PDBG("%s ib_qp %p\n", __func__, qp);
- atomic_inc(&(to_c4iw_qp(qp)->refcnt));
+ kref_get(&to_c4iw_qp(qp)->kref);
}
void c4iw_qp_rem_ref(struct ib_qp *qp)
{
PDBG("%s ib_qp %p\n", __func__, qp);
- if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt)))
- wake_up(&(to_c4iw_qp(qp)->wait));
+ kref_put(&to_c4iw_qp(qp)->kref, _free_qp);
}
static void add_to_fc_list(struct list_head *head, struct list_head *entry)
@@ -1592,8 +1600,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
wait_event(qhp->wait, !qhp->ep);
remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
- atomic_dec(&qhp->refcnt);
- wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
spin_lock_irq(&rhp->lock);
if (!list_empty(&qhp->db_fc_entry))
@@ -1606,8 +1612,9 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ c4iw_qp_rem_ref(ib_qp);
+
PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
- kfree(qhp);
return 0;
}
@@ -1704,7 +1711,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
init_completion(&qhp->rq_drained);
mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait);
- atomic_set(&qhp->refcnt, 1);
+ kref_init(&qhp->kref);
ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
if (ret)
--
2.7.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 2/2] iw_cm: free cm_id resources on the last deref
2016-07-29 20:36 [PATCH v2 0/2] iwarp device removal deadlock fixes Steve Wise
2016-07-29 18:00 ` [PATCH v2 1/2] iw_cxgb4: don't block in destroy_qp awaiting the last deref Steve Wise
@ 2016-07-29 18:00 ` Steve Wise
2016-08-02 17:24 ` [PATCH v2 0/2] iwarp device removal deadlock fixes Doug Ledford
2 siblings, 0 replies; 5+ messages in thread
From: Steve Wise @ 2016-07-29 18:00 UTC (permalink / raw)
Remove the complicated logic to free the iw_cm_id inside iw_cm
event handlers vs when an application thread destroys the cm_id.
Also remove the block in iw_destroy_cm_id() to block the application
until all references are removed. This block can cause a deadlock when
disconnecting or destroying cm_ids inside an rdma_cm event handler.
Simply allowing the last deref of the iw_cm_id to free the memory
is cleaner and avoids this potential deadlock. Also a flag is added,
IW_CM_DROP_EVENTS, that is set when the cm_id is marked for destruction.
If any events are pending on this iw_cm_id, then as they are processed
they will be dropped vs posted upstream if IW_CM_DROP_EVENTS is set.
Signed-off-by: Steve Wise <swise at opengridcomputing.com>
---
drivers/infiniband/core/iwcm.c | 54 +++++++++++++-----------------------------
drivers/infiniband/core/iwcm.h | 2 +-
2 files changed, 18 insertions(+), 38 deletions(-)
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index f057204..357624f 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -183,15 +183,14 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
/*
* Release a reference on cm_id. If the last reference is being
- * released, enable the waiting thread (in iw_destroy_cm_id) to
- * get woken up, and return 1 if a thread is already waiting.
+ * released, free the cm_id and return 1.
*/
static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
{
BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
if (atomic_dec_and_test(&cm_id_priv->refcount)) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
- complete(&cm_id_priv->destroy_comp);
+ free_cm_id(cm_id_priv);
return 1;
}
@@ -208,19 +207,10 @@ static void add_ref(struct iw_cm_id *cm_id)
static void rem_ref(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
- int cb_destroy;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- /*
- * Test bit before deref in case the cm_id gets freed on another
- * thread.
- */
- cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
- if (iwcm_deref_id(cm_id_priv) && cb_destroy) {
- BUG_ON(!list_empty(&cm_id_priv->work_list));
- free_cm_id(cm_id_priv);
- }
+ (void)iwcm_deref_id(cm_id_priv);
}
static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
@@ -370,6 +360,12 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
wait_event(cm_id_priv->connect_wait,
!test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+ /*
+ * Since we're deleting the cm_id, drop any events that
+ * might arrive before the last dereference.
+ */
+ set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
+
spin_lock_irqsave(&cm_id_priv->lock, flags);
switch (cm_id_priv->state) {
case IW_CM_STATE_LISTEN:
@@ -433,13 +429,7 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id)
struct iwcm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
-
destroy_cm_id(cm_id);
-
- wait_for_completion(&cm_id_priv->destroy_comp);
-
- free_cm_id(cm_id_priv);
}
EXPORT_SYMBOL(iw_destroy_cm_id);
@@ -809,10 +799,7 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
ret = cm_id->cm_handler(cm_id, iw_event);
if (ret) {
iw_cm_reject(cm_id, NULL, 0);
- set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
- destroy_cm_id(cm_id);
- if (atomic_read(&cm_id_priv->refcount)==0)
- free_cm_id(cm_id_priv);
+ iw_destroy_cm_id(cm_id);
}
out:
@@ -1000,7 +987,6 @@ static void cm_work_handler(struct work_struct *_work)
unsigned long flags;
int empty;
int ret = 0;
- int destroy_id;
spin_lock_irqsave(&cm_id_priv->lock, flags);
empty = list_empty(&cm_id_priv->work_list);
@@ -1013,20 +999,14 @@ static void cm_work_handler(struct work_struct *_work)
put_work(work);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = process_event(cm_id_priv, &levent);
- if (ret) {
- set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
- destroy_cm_id(&cm_id_priv->id);
- }
- BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
- destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
- if (iwcm_deref_id(cm_id_priv)) {
- if (destroy_id) {
- BUG_ON(!list_empty(&cm_id_priv->work_list));
- free_cm_id(cm_id_priv);
- }
+ if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
+ ret = process_event(cm_id_priv, &levent);
+ if (ret)
+ destroy_cm_id(&cm_id_priv->id);
+ } else
+ pr_debug("dropping event %d\n", levent.event);
+ if (iwcm_deref_id(cm_id_priv))
return;
- }
if (empty)
return;
spin_lock_irqsave(&cm_id_priv->lock, flags);
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
index 3f6cc82..82c2cd1 100644
--- a/drivers/infiniband/core/iwcm.h
+++ b/drivers/infiniband/core/iwcm.h
@@ -56,7 +56,7 @@ struct iwcm_id_private {
struct list_head work_free_list;
};
-#define IWCM_F_CALLBACK_DESTROY 1
+#define IWCM_F_DROP_EVENTS 1
#define IWCM_F_CONNECT_WAIT 2
#endif /* IWCM_H */
--
2.7.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 0/2] iwarp device removal deadlock fixes
@ 2016-07-29 20:36 Steve Wise
2016-07-29 18:00 ` [PATCH v2 1/2] iw_cxgb4: don't block in destroy_qp awaiting the last deref Steve Wise
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Steve Wise @ 2016-07-29 20:36 UTC (permalink / raw)
This series fixes the deadlock issue discovered while testing nvmf/rdma
handling rdma device removal events from the rdma_cm. For a discussion
of the deadlock that can happen, see
http://lists.infradead.org/pipermail/linux-nvme/2016-July/005440.html.
For my description of the deadlock itself, see this post in the above thread:
http://lists.infradead.org/pipermail/linux-nvme/2016-July/005465.html
In a nutshell, iw_cxgb4 and the iw_cm block during qp/cm_id destruction
until all references are removed. This combined with the iwarp CM passing
disconnect events up to the rdma_cm during disconnect and/or qp/cm_id destruction
leads to a deadlock.
My proposed solution is to remove the need for iw_cxgb4 and iw_cm to
block during object destruction for the refnts to reach 0, but rather to
let the freeing of the object memory be deferred when the last deref is
done, which is SOP in the much of the linux kernel. This allows all the
qps/cm_ids to be destroyed without blocking, and all the object memory
freeing ends up happinging when the application's device_remove event
handler function returns to the rdma_cm.
This series is needed along with Sagi's fixes from:
https://www.spinics.net/lists/linux-rdma/msg38715.html
Hey Faisal, it would be great to get some review/test tags from Intel
on the iw_cm change. Thanks!
Changes since v1:
- reworded commit text for the iw_cm patch
- added a iw_cm_id flag to drop pending events when the cm_id has
been marked for destruction.
---
Steve Wise (2):
iw_cxgb4: don't block in destroy_qp awaiting the last deref
iw_cm: free cm_id resources on the last deref
drivers/infiniband/core/iwcm.c | 54 +++++++++++-----------------------
drivers/infiniband/core/iwcm.h | 2 +-
drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +-
drivers/infiniband/hw/cxgb4/qp.c | 21 ++++++++-----
4 files changed, 33 insertions(+), 46 deletions(-)
--
2.7.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v2 1/2] iw_cxgb4: don't block in destroy_qp awaiting the last deref
2016-07-29 18:00 ` [PATCH v2 1/2] iw_cxgb4: don't block in destroy_qp awaiting the last deref Steve Wise
@ 2016-07-31 14:29 ` Sagi Grimberg
0 siblings, 0 replies; 5+ messages in thread
From: Sagi Grimberg @ 2016-07-31 14:29 UTC (permalink / raw)
Looks good,
Reviewed-by: Sagi Grimberg <sagi at grimberg.me>
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v2 0/2] iwarp device removal deadlock fixes
2016-07-29 20:36 [PATCH v2 0/2] iwarp device removal deadlock fixes Steve Wise
2016-07-29 18:00 ` [PATCH v2 1/2] iw_cxgb4: don't block in destroy_qp awaiting the last deref Steve Wise
2016-07-29 18:00 ` [PATCH v2 2/2] iw_cm: free cm_id resources on " Steve Wise
@ 2016-08-02 17:24 ` Doug Ledford
2 siblings, 0 replies; 5+ messages in thread
From: Doug Ledford @ 2016-08-02 17:24 UTC (permalink / raw)
On Fri, 2016-07-29@13:36 -0700, Steve Wise wrote:
> This series fixes the deadlock issue discovered while testing
> nvmf/rdma
> handling rdma device removal events from the rdma_cm.??For a
> discussion
> of the deadlock that can happen, see
>
> http://lists.infradead.org/pipermail/linux-nvme/2016-July/005440.html
> .
>
> For my description of the deadlock itself, see this post in the above
> thread:
>
> http://lists.infradead.org/pipermail/linux-nvme/2016-July/005465.html
>
> In a nutshell, iw_cxgb4 and the iw_cm block during qp/cm_id
> destruction
> until all references are removed.??This combined with the iwarp CM
> passing
> disconnect events up to the rdma_cm during disconnect and/or qp/cm_id
> destruction
> leads to a deadlock.
>
> My proposed solution is to remove the need for iw_cxgb4 and iw_cm to
> block during object destruction for the refnts to reach 0, but rather
> to
> let the freeing of the object memory be deferred when the last deref
> is
> done, which is SOP in the much of the linux kernel. This allows all
> the
> qps/cm_ids to be destroyed without blocking, and all the object
> memory
> freeing ends up happinging when the application's device_remove event
> handler function returns to the rdma_cm.
>
> This series is needed along with Sagi's fixes from:
> https://www.spinics.net/lists/linux-rdma/msg38715.html
>
> Hey Faisal, it would be great to get some review/test tags from Intel
> on the iw_cm change.??Thanks!
>
> Changes since v1:
>
> - reworded commit text for the iw_cm patch
>
> - added a iw_cm_id flag to drop pending events when the cm_id has
> been marked for destruction.
>
> ---
>
> Steve Wise (2):
> ? iw_cxgb4: don't block in destroy_qp awaiting the last deref
> ? iw_cm: free cm_id resources on the last deref
>
> ?drivers/infiniband/core/iwcm.c?????????| 54 +++++++++++-------------
> ----------
> ?drivers/infiniband/core/iwcm.h?????????|??2 +-
> ?drivers/infiniband/hw/cxgb4/iw_cxgb4.h |??2 +-
> ?drivers/infiniband/hw/cxgb4/qp.c???????| 21 ++++++++-----
> ?4 files changed, 33 insertions(+), 46 deletions(-)
>
Series applied, thanks.
--
Doug Ledford <dledford at redhat.com>
GPG KeyID: 0E572FDD
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.infradead.org/pipermail/linux-nvme/attachments/20160802/ec1255d9/attachment.sig>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2016-08-02 17:24 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-07-29 20:36 [PATCH v2 0/2] iwarp device removal deadlock fixes Steve Wise
2016-07-29 18:00 ` [PATCH v2 1/2] iw_cxgb4: don't block in destroy_qp awaiting the last deref Steve Wise
2016-07-31 14:29 ` Sagi Grimberg
2016-07-29 18:00 ` [PATCH v2 2/2] iw_cm: free cm_id resources on " Steve Wise
2016-08-02 17:24 ` [PATCH v2 0/2] iwarp device removal deadlock fixes Doug Ledford
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).